Matej Horník
commited on
Commit
·
19e6d59
1
Parent(s):
3090a99
feat: docs for api endpoints to generate openapi specification (#3109)
Browse files### What problem does this PR solve?
**Added openapi specification for API routes. This creates swagger UI
similar to FastAPI to better use the API.**
Using python package `flasgger`
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Not all routes are included since this is a work in progress.
Docs can be accessed on: `{host}:{port}/apidocs`
- api/apps/__init__.py +63 -19
- api/apps/sdk/dataset.py +379 -76
- api/apps/sdk/doc.py +891 -151
- api/apps/system_app.py +188 -26
- api/apps/user_app.py +400 -108
- api/ragflow_server.py +28 -11
- poetry.lock +100 -6
- pyproject.toml +2 -1
api/apps/__init__.py
CHANGED
@@ -21,6 +21,7 @@ from pathlib import Path
|
|
21 |
from flask import Blueprint, Flask
|
22 |
from werkzeug.wrappers.request import Request
|
23 |
from flask_cors import CORS
|
|
|
24 |
|
25 |
from api.db import StatusEnum
|
26 |
from api.db.db_models import close_connection
|
@@ -34,27 +35,62 @@ from api.settings import API_VERSION, access_logger
|
|
34 |
from api.utils.api_utils import server_error_response
|
35 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
36 |
|
37 |
-
__all__ = [
|
38 |
|
39 |
|
40 |
-
logger = logging.getLogger(
|
41 |
for h in access_logger.handlers:
|
42 |
logger.addHandler(h)
|
43 |
|
44 |
Request.json = property(lambda self: self.get_json(force=True, silent=True))
|
45 |
|
46 |
app = Flask(__name__)
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
app.url_map.strict_slashes = False
|
49 |
app.json_encoder = CustomJSONEncoder
|
50 |
app.errorhandler(Exception)(server_error_response)
|
51 |
|
52 |
|
53 |
## convince for dev and debug
|
54 |
-
#app.config["LOGIN_DISABLED"] = True
|
55 |
app.config["SESSION_PERMANENT"] = False
|
56 |
app.config["SESSION_TYPE"] = "filesystem"
|
57 |
-
app.config[
|
|
|
|
|
58 |
|
59 |
Session(app)
|
60 |
login_manager = LoginManager()
|
@@ -64,17 +100,23 @@ commands.register_commands(app)
|
|
64 |
|
65 |
|
66 |
def search_pages_path(pages_dir):
|
67 |
-
app_path_list = [
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
app_path_list.extend(api_path_list)
|
70 |
return app_path_list
|
71 |
|
72 |
|
73 |
def register_page(page_path):
|
74 |
-
path = f
|
75 |
|
76 |
-
page_name = page_path.stem.rstrip(
|
77 |
-
module_name =
|
|
|
|
|
78 |
|
79 |
spec = spec_from_file_location(module_name, page_path)
|
80 |
page = module_from_spec(spec)
|
@@ -82,8 +124,10 @@ def register_page(page_path):
|
|
82 |
page.manager = Blueprint(page_name, module_name)
|
83 |
sys.modules[module_name] = page
|
84 |
spec.loader.exec_module(page)
|
85 |
-
page_name = getattr(page,
|
86 |
-
url_prefix =
|
|
|
|
|
87 |
|
88 |
app.register_blueprint(page.manager, url_prefix=url_prefix)
|
89 |
return url_prefix
|
@@ -91,14 +135,12 @@ def register_page(page_path):
|
|
91 |
|
92 |
pages_dir = [
|
93 |
Path(__file__).parent,
|
94 |
-
Path(__file__).parent.parent /
|
95 |
-
Path(__file__).parent.parent /
|
96 |
]
|
97 |
|
98 |
client_urls_prefix = [
|
99 |
-
register_page(path)
|
100 |
-
for dir in pages_dir
|
101 |
-
for path in search_pages_path(dir)
|
102 |
]
|
103 |
|
104 |
|
@@ -109,7 +151,9 @@ def load_user(web_request):
|
|
109 |
if authorization:
|
110 |
try:
|
111 |
access_token = str(jwt.loads(authorization))
|
112 |
-
user = UserService.query(
|
|
|
|
|
113 |
if user:
|
114 |
return user[0]
|
115 |
else:
|
@@ -123,4 +167,4 @@ def load_user(web_request):
|
|
123 |
|
124 |
@app.teardown_request
|
125 |
def _db_close(exc):
|
126 |
-
close_connection()
|
|
|
21 |
from flask import Blueprint, Flask
|
22 |
from werkzeug.wrappers.request import Request
|
23 |
from flask_cors import CORS
|
24 |
+
from flasgger import Swagger
|
25 |
|
26 |
from api.db import StatusEnum
|
27 |
from api.db.db_models import close_connection
|
|
|
35 |
from api.utils.api_utils import server_error_response
|
36 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
37 |
|
38 |
+
__all__ = ["app"]
|
39 |
|
40 |
|
41 |
+
logger = logging.getLogger("flask.app")
|
42 |
for h in access_logger.handlers:
|
43 |
logger.addHandler(h)
|
44 |
|
45 |
Request.json = property(lambda self: self.get_json(force=True, silent=True))
|
46 |
|
47 |
app = Flask(__name__)
|
48 |
+
|
49 |
+
# Add this at the beginning of your file to configure Swagger UI
|
50 |
+
swagger_config = {
|
51 |
+
"headers": [],
|
52 |
+
"specs": [
|
53 |
+
{
|
54 |
+
"endpoint": "apispec",
|
55 |
+
"route": "/apispec.json",
|
56 |
+
"rule_filter": lambda rule: True, # Include all endpoints
|
57 |
+
"model_filter": lambda tag: True, # Include all models
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"static_url_path": "/flasgger_static",
|
61 |
+
"swagger_ui": True,
|
62 |
+
"specs_route": "/apidocs/",
|
63 |
+
}
|
64 |
+
|
65 |
+
swagger = Swagger(
|
66 |
+
app,
|
67 |
+
config=swagger_config,
|
68 |
+
template={
|
69 |
+
"swagger": "2.0",
|
70 |
+
"info": {
|
71 |
+
"title": "RAGFlow API",
|
72 |
+
"description": "",
|
73 |
+
"version": "1.0.0",
|
74 |
+
},
|
75 |
+
"securityDefinitions": {
|
76 |
+
"ApiKeyAuth": {"type": "apiKey", "name": "Authorization", "in": "header"}
|
77 |
+
},
|
78 |
+
},
|
79 |
+
)
|
80 |
+
|
81 |
+
CORS(app, supports_credentials=True, max_age=2592000)
|
82 |
app.url_map.strict_slashes = False
|
83 |
app.json_encoder = CustomJSONEncoder
|
84 |
app.errorhandler(Exception)(server_error_response)
|
85 |
|
86 |
|
87 |
## convince for dev and debug
|
88 |
+
# app.config["LOGIN_DISABLED"] = True
|
89 |
app.config["SESSION_PERMANENT"] = False
|
90 |
app.config["SESSION_TYPE"] = "filesystem"
|
91 |
+
app.config["MAX_CONTENT_LENGTH"] = int(
|
92 |
+
os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)
|
93 |
+
)
|
94 |
|
95 |
Session(app)
|
96 |
login_manager = LoginManager()
|
|
|
100 |
|
101 |
|
102 |
def search_pages_path(pages_dir):
|
103 |
+
app_path_list = [
|
104 |
+
path for path in pages_dir.glob("*_app.py") if not path.name.startswith(".")
|
105 |
+
]
|
106 |
+
api_path_list = [
|
107 |
+
path for path in pages_dir.glob("*sdk/*.py") if not path.name.startswith(".")
|
108 |
+
]
|
109 |
app_path_list.extend(api_path_list)
|
110 |
return app_path_list
|
111 |
|
112 |
|
113 |
def register_page(page_path):
|
114 |
+
path = f"{page_path}"
|
115 |
|
116 |
+
page_name = page_path.stem.rstrip("_app")
|
117 |
+
module_name = ".".join(
|
118 |
+
page_path.parts[page_path.parts.index("api") : -1] + (page_name,)
|
119 |
+
)
|
120 |
|
121 |
spec = spec_from_file_location(module_name, page_path)
|
122 |
page = module_from_spec(spec)
|
|
|
124 |
page.manager = Blueprint(page_name, module_name)
|
125 |
sys.modules[module_name] = page
|
126 |
spec.loader.exec_module(page)
|
127 |
+
page_name = getattr(page, "page_name", page_name)
|
128 |
+
url_prefix = (
|
129 |
+
f"/api/{API_VERSION}" if "/sdk/" in path else f"/{API_VERSION}/{page_name}"
|
130 |
+
)
|
131 |
|
132 |
app.register_blueprint(page.manager, url_prefix=url_prefix)
|
133 |
return url_prefix
|
|
|
135 |
|
136 |
pages_dir = [
|
137 |
Path(__file__).parent,
|
138 |
+
Path(__file__).parent.parent / "api" / "apps",
|
139 |
+
Path(__file__).parent.parent / "api" / "apps" / "sdk",
|
140 |
]
|
141 |
|
142 |
client_urls_prefix = [
|
143 |
+
register_page(path) for dir in pages_dir for path in search_pages_path(dir)
|
|
|
|
|
144 |
]
|
145 |
|
146 |
|
|
|
151 |
if authorization:
|
152 |
try:
|
153 |
access_token = str(jwt.loads(authorization))
|
154 |
+
user = UserService.query(
|
155 |
+
access_token=access_token, status=StatusEnum.VALID.value
|
156 |
+
)
|
157 |
if user:
|
158 |
return user[0]
|
159 |
else:
|
|
|
167 |
|
168 |
@app.teardown_request
|
169 |
def _db_close(exc):
|
170 |
+
close_connection()
|
api/apps/sdk/dataset.py
CHANGED
@@ -21,16 +21,72 @@ from api.db.services.document_service import DocumentService
|
|
21 |
from api.db.services.file2document_service import File2DocumentService
|
22 |
from api.db.services.file_service import FileService
|
23 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
24 |
-
from api.db.services.llm_service import TenantLLMService,LLMService
|
25 |
from api.db.services.user_service import TenantService
|
26 |
from api.settings import RetCode
|
27 |
from api.utils import get_uuid
|
28 |
-
from api.utils.api_utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
-
@manager.route(
|
32 |
@token_required
|
33 |
def create(tenant_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
req = request.json
|
35 |
e, t = TenantService.get_by_id(tenant_id)
|
36 |
permission = req.get("permission")
|
@@ -38,49 +94,97 @@ def create(tenant_id):
|
|
38 |
chunk_method = req.get("chunk_method")
|
39 |
parser_config = req.get("parser_config")
|
40 |
valid_permission = ["me", "team"]
|
41 |
-
valid_language =["Chinese", "English"]
|
42 |
-
valid_chunk_method = [
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
if check_validation:
|
45 |
return check_validation
|
46 |
-
req["parser_config"]=get_parser_config(chunk_method,parser_config)
|
47 |
if "tenant_id" in req:
|
48 |
-
return get_error_data_result(
|
49 |
-
retmsg="`tenant_id` must not be provided")
|
50 |
if "chunk_count" in req or "document_count" in req:
|
51 |
-
return get_error_data_result(retmsg="`chunk_count` or `document_count` must not be provided")
|
52 |
-
if "name" not in req:
|
53 |
return get_error_data_result(
|
54 |
-
retmsg="`
|
55 |
-
|
|
|
|
|
|
|
56 |
req["name"] = req["name"].strip()
|
57 |
if req["name"] == "":
|
|
|
|
|
|
|
|
|
58 |
return get_error_data_result(
|
59 |
-
retmsg="
|
60 |
-
|
61 |
-
|
62 |
-
retmsg="Duplicated dataset name in creating dataset.")
|
63 |
-
req["tenant_id"] = req['created_by'] = tenant_id
|
64 |
if not req.get("embedding_model"):
|
65 |
-
req[
|
66 |
else:
|
67 |
-
valid_embedding_models=[
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
if not embd_model:
|
73 |
-
return get_error_data_result(
|
|
|
|
|
74 |
if embd_model:
|
75 |
-
if req[
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
key_mapping = {
|
78 |
"chunk_num": "chunk_count",
|
79 |
"doc_num": "document_count",
|
80 |
"parser_id": "chunk_method",
|
81 |
-
"embd_id": "embedding_model"
|
|
|
|
|
|
|
|
|
|
|
82 |
}
|
83 |
-
mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
|
84 |
req.update(mapped_keys)
|
85 |
if not KnowledgebaseService.save(**req):
|
86 |
return get_error_data_result(retmsg="Create dataset error.(Database error)")
|
@@ -91,21 +195,53 @@ def create(tenant_id):
|
|
91 |
renamed_data[new_key] = value
|
92 |
return get_result(data=renamed_data)
|
93 |
|
94 |
-
|
|
|
95 |
@token_required
|
96 |
def delete(tenant_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
req = request.json
|
98 |
if not req:
|
99 |
-
ids=None
|
100 |
else:
|
101 |
-
ids=req.get("ids")
|
102 |
if not ids:
|
103 |
id_list = []
|
104 |
-
kbs=KnowledgebaseService.query(tenant_id=tenant_id)
|
105 |
for kb in kbs:
|
106 |
id_list.append(kb.id)
|
107 |
else:
|
108 |
-
id_list=ids
|
109 |
for id in id_list:
|
110 |
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
111 |
if not kbs:
|
@@ -113,19 +249,75 @@ def delete(tenant_id):
|
|
113 |
for doc in DocumentService.query(kb_id=id):
|
114 |
if not DocumentService.remove_document(doc, tenant_id):
|
115 |
return get_error_data_result(
|
116 |
-
retmsg="Remove document error.(Database error)"
|
|
|
117 |
f2d = File2DocumentService.get_by_document_id(doc.id)
|
118 |
-
FileService.filter_delete(
|
|
|
|
|
|
|
|
|
|
|
119 |
File2DocumentService.delete_by_document_id(doc.id)
|
120 |
if not KnowledgebaseService.delete_by_id(id):
|
121 |
-
return get_error_data_result(
|
122 |
-
retmsg="Delete dataset error.(Database error)")
|
123 |
return get_result(retcode=RetCode.SUCCESS)
|
124 |
|
125 |
-
|
|
|
126 |
@token_required
|
127 |
-
def update(tenant_id,dataset_id):
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
return get_error_data_result(retmsg="You don't own the dataset")
|
130 |
req = request.json
|
131 |
e, t = TenantService.get_by_id(tenant_id)
|
@@ -138,91 +330,202 @@ def update(tenant_id,dataset_id):
|
|
138 |
parser_config = req.get("parser_config")
|
139 |
valid_permission = ["me", "team"]
|
140 |
valid_language = ["Chinese", "English"]
|
141 |
-
valid_chunk_method = [
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
if check_validation:
|
145 |
return check_validation
|
146 |
if "tenant_id" in req:
|
147 |
if req["tenant_id"] != tenant_id:
|
148 |
-
return get_error_data_result(
|
149 |
-
retmsg="Can't change `tenant_id`.")
|
150 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
151 |
if "parser_config" in req:
|
152 |
-
temp_dict=kb.parser_config
|
153 |
temp_dict.update(req["parser_config"])
|
154 |
req["parser_config"] = temp_dict
|
155 |
if "chunk_count" in req:
|
156 |
if req["chunk_count"] != kb.chunk_num:
|
157 |
-
return get_error_data_result(
|
158 |
-
retmsg="Can't change `chunk_count`.")
|
159 |
req.pop("chunk_count")
|
160 |
if "document_count" in req:
|
161 |
-
if req[
|
162 |
-
return get_error_data_result(
|
163 |
-
retmsg="Can't change `document_count`.")
|
164 |
req.pop("document_count")
|
165 |
if "chunk_method" in req:
|
166 |
-
if kb.chunk_num != 0 and req[
|
167 |
return get_error_data_result(
|
168 |
-
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable."
|
169 |
-
|
170 |
-
|
|
|
171 |
if not req.get("parser_config"):
|
172 |
req["parser_config"] = get_parser_config(chunk_method, parser_config)
|
173 |
if "embedding_model" in req:
|
174 |
-
if kb.chunk_num != 0 and req[
|
175 |
return get_error_data_result(
|
176 |
-
retmsg="If `chunk_count` is not 0, `embedding_model` is not changeable."
|
|
|
177 |
if not req.get("embedding_model"):
|
178 |
return get_error_data_result("`embedding_model` can't be empty")
|
179 |
-
valid_embedding_models=[
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
if not embd_model:
|
185 |
-
return get_error_data_result(
|
|
|
|
|
186 |
if embd_model:
|
187 |
-
if req[
|
188 |
-
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
if "name" in req:
|
191 |
req["name"] = req["name"].strip()
|
192 |
-
if
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
return get_error_data_result(
|
196 |
-
retmsg="Duplicated dataset name in updating dataset."
|
|
|
197 |
if not KnowledgebaseService.update_by_id(kb.id, req):
|
198 |
return get_error_data_result(retmsg="Update dataset error.(Database error)")
|
199 |
return get_result(retcode=RetCode.SUCCESS)
|
200 |
|
201 |
-
|
|
|
202 |
@token_required
|
203 |
def list(tenant_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
id = request.args.get("id")
|
205 |
name = request.args.get("name")
|
206 |
-
kbs = KnowledgebaseService.query(id=id,name=name,status=1)
|
207 |
if not kbs:
|
208 |
return get_error_data_result(retmsg="The dataset doesn't exist")
|
209 |
page_number = int(request.args.get("page", 1))
|
210 |
items_per_page = int(request.args.get("page_size", 1024))
|
211 |
orderby = request.args.get("orderby", "create_time")
|
212 |
-
if request.args.get("desc") == "False" or request.args.get("desc") == "false"
|
213 |
desc = False
|
214 |
else:
|
215 |
desc = True
|
216 |
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
|
217 |
kbs = KnowledgebaseService.get_list(
|
218 |
-
[m["tenant_id"] for m in tenants],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
renamed_list = []
|
220 |
for kb in kbs:
|
221 |
key_mapping = {
|
222 |
"chunk_num": "chunk_count",
|
223 |
"doc_num": "document_count",
|
224 |
"parser_id": "chunk_method",
|
225 |
-
"embd_id": "embedding_model"
|
226 |
}
|
227 |
renamed_data = {}
|
228 |
for key, value in kb.items():
|
|
|
21 |
from api.db.services.file2document_service import File2DocumentService
|
22 |
from api.db.services.file_service import FileService
|
23 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
24 |
+
from api.db.services.llm_service import TenantLLMService, LLMService
|
25 |
from api.db.services.user_service import TenantService
|
26 |
from api.settings import RetCode
|
27 |
from api.utils import get_uuid
|
28 |
+
from api.utils.api_utils import (
|
29 |
+
get_result,
|
30 |
+
token_required,
|
31 |
+
get_error_data_result,
|
32 |
+
valid,
|
33 |
+
get_parser_config,
|
34 |
+
)
|
35 |
|
36 |
|
37 |
+
@manager.route("/datasets", methods=["POST"])
|
38 |
@token_required
|
39 |
def create(tenant_id):
|
40 |
+
"""
|
41 |
+
Create a new dataset.
|
42 |
+
---
|
43 |
+
tags:
|
44 |
+
- Datasets
|
45 |
+
security:
|
46 |
+
- ApiKeyAuth: []
|
47 |
+
parameters:
|
48 |
+
- in: header
|
49 |
+
name: Authorization
|
50 |
+
type: string
|
51 |
+
required: true
|
52 |
+
description: Bearer token for authentication.
|
53 |
+
- in: body
|
54 |
+
name: body
|
55 |
+
description: Dataset creation parameters.
|
56 |
+
required: true
|
57 |
+
schema:
|
58 |
+
type: object
|
59 |
+
required:
|
60 |
+
- name
|
61 |
+
properties:
|
62 |
+
name:
|
63 |
+
type: string
|
64 |
+
description: Name of the dataset.
|
65 |
+
permission:
|
66 |
+
type: string
|
67 |
+
enum: ['me', 'team']
|
68 |
+
description: Dataset permission.
|
69 |
+
language:
|
70 |
+
type: string
|
71 |
+
enum: ['Chinese', 'English']
|
72 |
+
description: Language of the dataset.
|
73 |
+
chunk_method:
|
74 |
+
type: string
|
75 |
+
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
76 |
+
"presentation", "picture", "one", "knowledge_graph", "email"]
|
77 |
+
description: Chunking method.
|
78 |
+
parser_config:
|
79 |
+
type: object
|
80 |
+
description: Parser configuration.
|
81 |
+
responses:
|
82 |
+
200:
|
83 |
+
description: Successful operation.
|
84 |
+
schema:
|
85 |
+
type: object
|
86 |
+
properties:
|
87 |
+
data:
|
88 |
+
type: object
|
89 |
+
"""
|
90 |
req = request.json
|
91 |
e, t = TenantService.get_by_id(tenant_id)
|
92 |
permission = req.get("permission")
|
|
|
94 |
chunk_method = req.get("chunk_method")
|
95 |
parser_config = req.get("parser_config")
|
96 |
valid_permission = ["me", "team"]
|
97 |
+
valid_language = ["Chinese", "English"]
|
98 |
+
valid_chunk_method = [
|
99 |
+
"naive",
|
100 |
+
"manual",
|
101 |
+
"qa",
|
102 |
+
"table",
|
103 |
+
"paper",
|
104 |
+
"book",
|
105 |
+
"laws",
|
106 |
+
"presentation",
|
107 |
+
"picture",
|
108 |
+
"one",
|
109 |
+
"knowledge_graph",
|
110 |
+
"email",
|
111 |
+
]
|
112 |
+
check_validation = valid(
|
113 |
+
permission,
|
114 |
+
valid_permission,
|
115 |
+
language,
|
116 |
+
valid_language,
|
117 |
+
chunk_method,
|
118 |
+
valid_chunk_method,
|
119 |
+
)
|
120 |
if check_validation:
|
121 |
return check_validation
|
122 |
+
req["parser_config"] = get_parser_config(chunk_method, parser_config)
|
123 |
if "tenant_id" in req:
|
124 |
+
return get_error_data_result(retmsg="`tenant_id` must not be provided")
|
|
|
125 |
if "chunk_count" in req or "document_count" in req:
|
|
|
|
|
126 |
return get_error_data_result(
|
127 |
+
retmsg="`chunk_count` or `document_count` must not be provided"
|
128 |
+
)
|
129 |
+
if "name" not in req:
|
130 |
+
return get_error_data_result(retmsg="`name` is not empty!")
|
131 |
+
req["id"] = get_uuid()
|
132 |
req["name"] = req["name"].strip()
|
133 |
if req["name"] == "":
|
134 |
+
return get_error_data_result(retmsg="`name` is not empty string!")
|
135 |
+
if KnowledgebaseService.query(
|
136 |
+
name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value
|
137 |
+
):
|
138 |
return get_error_data_result(
|
139 |
+
retmsg="Duplicated dataset name in creating dataset."
|
140 |
+
)
|
141 |
+
req["tenant_id"] = req["created_by"] = tenant_id
|
|
|
|
|
142 |
if not req.get("embedding_model"):
|
143 |
+
req["embedding_model"] = t.embd_id
|
144 |
else:
|
145 |
+
valid_embedding_models = [
|
146 |
+
"BAAI/bge-large-zh-v1.5",
|
147 |
+
"BAAI/bge-base-en-v1.5",
|
148 |
+
"BAAI/bge-large-en-v1.5",
|
149 |
+
"BAAI/bge-small-en-v1.5",
|
150 |
+
"BAAI/bge-small-zh-v1.5",
|
151 |
+
"jinaai/jina-embeddings-v2-base-en",
|
152 |
+
"jinaai/jina-embeddings-v2-small-en",
|
153 |
+
"nomic-ai/nomic-embed-text-v1.5",
|
154 |
+
"sentence-transformers/all-MiniLM-L6-v2",
|
155 |
+
"text-embedding-v2",
|
156 |
+
"text-embedding-v3",
|
157 |
+
"maidalun1020/bce-embedding-base_v1",
|
158 |
+
]
|
159 |
+
embd_model = LLMService.query(
|
160 |
+
llm_name=req["embedding_model"], model_type="embedding"
|
161 |
+
)
|
162 |
if not embd_model:
|
163 |
+
return get_error_data_result(
|
164 |
+
f"`embedding_model` {req.get('embedding_model')} doesn't exist"
|
165 |
+
)
|
166 |
if embd_model:
|
167 |
+
if req[
|
168 |
+
"embedding_model"
|
169 |
+
] not in valid_embedding_models and not TenantLLMService.query(
|
170 |
+
tenant_id=tenant_id,
|
171 |
+
model_type="embedding",
|
172 |
+
llm_name=req.get("embedding_model"),
|
173 |
+
):
|
174 |
+
return get_error_data_result(
|
175 |
+
f"`embedding_model` {req.get('embedding_model')} doesn't exist"
|
176 |
+
)
|
177 |
key_mapping = {
|
178 |
"chunk_num": "chunk_count",
|
179 |
"doc_num": "document_count",
|
180 |
"parser_id": "chunk_method",
|
181 |
+
"embd_id": "embedding_model",
|
182 |
+
}
|
183 |
+
mapped_keys = {
|
184 |
+
new_key: req[old_key]
|
185 |
+
for new_key, old_key in key_mapping.items()
|
186 |
+
if old_key in req
|
187 |
}
|
|
|
188 |
req.update(mapped_keys)
|
189 |
if not KnowledgebaseService.save(**req):
|
190 |
return get_error_data_result(retmsg="Create dataset error.(Database error)")
|
|
|
195 |
renamed_data[new_key] = value
|
196 |
return get_result(data=renamed_data)
|
197 |
|
198 |
+
|
199 |
+
@manager.route("/datasets", methods=["DELETE"])
|
200 |
@token_required
|
201 |
def delete(tenant_id):
|
202 |
+
"""
|
203 |
+
Delete datasets.
|
204 |
+
---
|
205 |
+
tags:
|
206 |
+
- Datasets
|
207 |
+
security:
|
208 |
+
- ApiKeyAuth: []
|
209 |
+
parameters:
|
210 |
+
- in: header
|
211 |
+
name: Authorization
|
212 |
+
type: string
|
213 |
+
required: true
|
214 |
+
description: Bearer token for authentication.
|
215 |
+
- in: body
|
216 |
+
name: body
|
217 |
+
description: Dataset deletion parameters.
|
218 |
+
required: true
|
219 |
+
schema:
|
220 |
+
type: object
|
221 |
+
properties:
|
222 |
+
ids:
|
223 |
+
type: array
|
224 |
+
items:
|
225 |
+
type: string
|
226 |
+
description: List of dataset IDs to delete.
|
227 |
+
responses:
|
228 |
+
200:
|
229 |
+
description: Successful operation.
|
230 |
+
schema:
|
231 |
+
type: object
|
232 |
+
"""
|
233 |
req = request.json
|
234 |
if not req:
|
235 |
+
ids = None
|
236 |
else:
|
237 |
+
ids = req.get("ids")
|
238 |
if not ids:
|
239 |
id_list = []
|
240 |
+
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
|
241 |
for kb in kbs:
|
242 |
id_list.append(kb.id)
|
243 |
else:
|
244 |
+
id_list = ids
|
245 |
for id in id_list:
|
246 |
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
247 |
if not kbs:
|
|
|
249 |
for doc in DocumentService.query(kb_id=id):
|
250 |
if not DocumentService.remove_document(doc, tenant_id):
|
251 |
return get_error_data_result(
|
252 |
+
retmsg="Remove document error.(Database error)"
|
253 |
+
)
|
254 |
f2d = File2DocumentService.get_by_document_id(doc.id)
|
255 |
+
FileService.filter_delete(
|
256 |
+
[
|
257 |
+
File.source_type == FileSource.KNOWLEDGEBASE,
|
258 |
+
File.id == f2d[0].file_id,
|
259 |
+
]
|
260 |
+
)
|
261 |
File2DocumentService.delete_by_document_id(doc.id)
|
262 |
if not KnowledgebaseService.delete_by_id(id):
|
263 |
+
return get_error_data_result(retmsg="Delete dataset error.(Database error)")
|
|
|
264 |
return get_result(retcode=RetCode.SUCCESS)
|
265 |
|
266 |
+
|
267 |
+
@manager.route("/datasets/<dataset_id>", methods=["PUT"])
|
268 |
@token_required
|
269 |
+
def update(tenant_id, dataset_id):
|
270 |
+
"""
|
271 |
+
Update a dataset.
|
272 |
+
---
|
273 |
+
tags:
|
274 |
+
- Datasets
|
275 |
+
security:
|
276 |
+
- ApiKeyAuth: []
|
277 |
+
parameters:
|
278 |
+
- in: path
|
279 |
+
name: dataset_id
|
280 |
+
type: string
|
281 |
+
required: true
|
282 |
+
description: ID of the dataset to update.
|
283 |
+
- in: header
|
284 |
+
name: Authorization
|
285 |
+
type: string
|
286 |
+
required: true
|
287 |
+
description: Bearer token for authentication.
|
288 |
+
- in: body
|
289 |
+
name: body
|
290 |
+
description: Dataset update parameters.
|
291 |
+
required: true
|
292 |
+
schema:
|
293 |
+
type: object
|
294 |
+
properties:
|
295 |
+
name:
|
296 |
+
type: string
|
297 |
+
description: New name of the dataset.
|
298 |
+
permission:
|
299 |
+
type: string
|
300 |
+
enum: ['me', 'team']
|
301 |
+
description: Updated permission.
|
302 |
+
language:
|
303 |
+
type: string
|
304 |
+
enum: ['Chinese', 'English']
|
305 |
+
description: Updated language.
|
306 |
+
chunk_method:
|
307 |
+
type: string
|
308 |
+
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
309 |
+
"presentation", "picture", "one", "knowledge_graph", "email"]
|
310 |
+
description: Updated chunking method.
|
311 |
+
parser_config:
|
312 |
+
type: object
|
313 |
+
description: Updated parser configuration.
|
314 |
+
responses:
|
315 |
+
200:
|
316 |
+
description: Successful operation.
|
317 |
+
schema:
|
318 |
+
type: object
|
319 |
+
"""
|
320 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
321 |
return get_error_data_result(retmsg="You don't own the dataset")
|
322 |
req = request.json
|
323 |
e, t = TenantService.get_by_id(tenant_id)
|
|
|
330 |
parser_config = req.get("parser_config")
|
331 |
valid_permission = ["me", "team"]
|
332 |
valid_language = ["Chinese", "English"]
|
333 |
+
valid_chunk_method = [
|
334 |
+
"naive",
|
335 |
+
"manual",
|
336 |
+
"qa",
|
337 |
+
"table",
|
338 |
+
"paper",
|
339 |
+
"book",
|
340 |
+
"laws",
|
341 |
+
"presentation",
|
342 |
+
"picture",
|
343 |
+
"one",
|
344 |
+
"knowledge_graph",
|
345 |
+
"email",
|
346 |
+
]
|
347 |
+
check_validation = valid(
|
348 |
+
permission,
|
349 |
+
valid_permission,
|
350 |
+
language,
|
351 |
+
valid_language,
|
352 |
+
chunk_method,
|
353 |
+
valid_chunk_method,
|
354 |
+
)
|
355 |
if check_validation:
|
356 |
return check_validation
|
357 |
if "tenant_id" in req:
|
358 |
if req["tenant_id"] != tenant_id:
|
359 |
+
return get_error_data_result(retmsg="Can't change `tenant_id`.")
|
|
|
360 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
361 |
if "parser_config" in req:
|
362 |
+
temp_dict = kb.parser_config
|
363 |
temp_dict.update(req["parser_config"])
|
364 |
req["parser_config"] = temp_dict
|
365 |
if "chunk_count" in req:
|
366 |
if req["chunk_count"] != kb.chunk_num:
|
367 |
+
return get_error_data_result(retmsg="Can't change `chunk_count`.")
|
|
|
368 |
req.pop("chunk_count")
|
369 |
if "document_count" in req:
|
370 |
+
if req["document_count"] != kb.doc_num:
|
371 |
+
return get_error_data_result(retmsg="Can't change `document_count`.")
|
|
|
372 |
req.pop("document_count")
|
373 |
if "chunk_method" in req:
|
374 |
+
if kb.chunk_num != 0 and req["chunk_method"] != kb.parser_id:
|
375 |
return get_error_data_result(
|
376 |
+
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable."
|
377 |
+
)
|
378 |
+
req["parser_id"] = req.pop("chunk_method")
|
379 |
+
if req["parser_id"] != kb.parser_id:
|
380 |
if not req.get("parser_config"):
|
381 |
req["parser_config"] = get_parser_config(chunk_method, parser_config)
|
382 |
if "embedding_model" in req:
|
383 |
+
if kb.chunk_num != 0 and req["embedding_model"] != kb.embd_id:
|
384 |
return get_error_data_result(
|
385 |
+
retmsg="If `chunk_count` is not 0, `embedding_model` is not changeable."
|
386 |
+
)
|
387 |
if not req.get("embedding_model"):
|
388 |
return get_error_data_result("`embedding_model` can't be empty")
|
389 |
+
valid_embedding_models = [
|
390 |
+
"BAAI/bge-large-zh-v1.5",
|
391 |
+
"BAAI/bge-base-en-v1.5",
|
392 |
+
"BAAI/bge-large-en-v1.5",
|
393 |
+
"BAAI/bge-small-en-v1.5",
|
394 |
+
"BAAI/bge-small-zh-v1.5",
|
395 |
+
"jinaai/jina-embeddings-v2-base-en",
|
396 |
+
"jinaai/jina-embeddings-v2-small-en",
|
397 |
+
"nomic-ai/nomic-embed-text-v1.5",
|
398 |
+
"sentence-transformers/all-MiniLM-L6-v2",
|
399 |
+
"text-embedding-v2",
|
400 |
+
"text-embedding-v3",
|
401 |
+
"maidalun1020/bce-embedding-base_v1",
|
402 |
+
]
|
403 |
+
embd_model = LLMService.query(
|
404 |
+
llm_name=req["embedding_model"], model_type="embedding"
|
405 |
+
)
|
406 |
if not embd_model:
|
407 |
+
return get_error_data_result(
|
408 |
+
f"`embedding_model` {req.get('embedding_model')} doesn't exist"
|
409 |
+
)
|
410 |
if embd_model:
|
411 |
+
if req[
|
412 |
+
"embedding_model"
|
413 |
+
] not in valid_embedding_models and not TenantLLMService.query(
|
414 |
+
tenant_id=tenant_id,
|
415 |
+
model_type="embedding",
|
416 |
+
llm_name=req.get("embedding_model"),
|
417 |
+
):
|
418 |
+
return get_error_data_result(
|
419 |
+
f"`embedding_model` {req.get('embedding_model')} doesn't exist"
|
420 |
+
)
|
421 |
+
req["embd_id"] = req.pop("embedding_model")
|
422 |
if "name" in req:
|
423 |
req["name"] = req["name"].strip()
|
424 |
+
if (
|
425 |
+
req["name"].lower() != kb.name.lower()
|
426 |
+
and len(
|
427 |
+
KnowledgebaseService.query(
|
428 |
+
name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value
|
429 |
+
)
|
430 |
+
)
|
431 |
+
> 0
|
432 |
+
):
|
433 |
return get_error_data_result(
|
434 |
+
retmsg="Duplicated dataset name in updating dataset."
|
435 |
+
)
|
436 |
if not KnowledgebaseService.update_by_id(kb.id, req):
|
437 |
return get_error_data_result(retmsg="Update dataset error.(Database error)")
|
438 |
return get_result(retcode=RetCode.SUCCESS)
|
439 |
|
440 |
+
|
441 |
+
@manager.route("/datasets", methods=["GET"])
|
442 |
@token_required
|
443 |
def list(tenant_id):
|
444 |
+
"""
|
445 |
+
List datasets.
|
446 |
+
---
|
447 |
+
tags:
|
448 |
+
- Datasets
|
449 |
+
security:
|
450 |
+
- ApiKeyAuth: []
|
451 |
+
parameters:
|
452 |
+
- in: query
|
453 |
+
name: id
|
454 |
+
type: string
|
455 |
+
required: false
|
456 |
+
description: Dataset ID to filter.
|
457 |
+
- in: query
|
458 |
+
name: name
|
459 |
+
type: string
|
460 |
+
required: false
|
461 |
+
description: Dataset name to filter.
|
462 |
+
- in: query
|
463 |
+
name: page
|
464 |
+
type: integer
|
465 |
+
required: false
|
466 |
+
default: 1
|
467 |
+
description: Page number.
|
468 |
+
- in: query
|
469 |
+
name: page_size
|
470 |
+
type: integer
|
471 |
+
required: false
|
472 |
+
default: 1024
|
473 |
+
description: Number of items per page.
|
474 |
+
- in: query
|
475 |
+
name: orderby
|
476 |
+
type: string
|
477 |
+
required: false
|
478 |
+
default: "create_time"
|
479 |
+
description: Field to order by.
|
480 |
+
- in: query
|
481 |
+
name: desc
|
482 |
+
type: boolean
|
483 |
+
required: false
|
484 |
+
default: true
|
485 |
+
description: Order in descending.
|
486 |
+
- in: header
|
487 |
+
name: Authorization
|
488 |
+
type: string
|
489 |
+
required: true
|
490 |
+
description: Bearer token for authentication.
|
491 |
+
responses:
|
492 |
+
200:
|
493 |
+
description: Successful operation.
|
494 |
+
schema:
|
495 |
+
type: array
|
496 |
+
items:
|
497 |
+
type: object
|
498 |
+
"""
|
499 |
id = request.args.get("id")
|
500 |
name = request.args.get("name")
|
501 |
+
kbs = KnowledgebaseService.query(id=id, name=name, status=1)
|
502 |
if not kbs:
|
503 |
return get_error_data_result(retmsg="The dataset doesn't exist")
|
504 |
page_number = int(request.args.get("page", 1))
|
505 |
items_per_page = int(request.args.get("page_size", 1024))
|
506 |
orderby = request.args.get("orderby", "create_time")
|
507 |
+
if request.args.get("desc") == "False" or request.args.get("desc") == "false":
|
508 |
desc = False
|
509 |
else:
|
510 |
desc = True
|
511 |
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
|
512 |
kbs = KnowledgebaseService.get_list(
|
513 |
+
[m["tenant_id"] for m in tenants],
|
514 |
+
tenant_id,
|
515 |
+
page_number,
|
516 |
+
items_per_page,
|
517 |
+
orderby,
|
518 |
+
desc,
|
519 |
+
id,
|
520 |
+
name,
|
521 |
+
)
|
522 |
renamed_list = []
|
523 |
for kb in kbs:
|
524 |
key_mapping = {
|
525 |
"chunk_num": "chunk_count",
|
526 |
"doc_num": "document_count",
|
527 |
"parser_id": "chunk_method",
|
528 |
+
"embd_id": "embedding_model",
|
529 |
}
|
530 |
renamed_data = {}
|
531 |
for key, value in kb.items():
|
api/apps/sdk/doc.py
CHANGED
@@ -39,7 +39,7 @@ from api.db.services.file2document_service import File2DocumentService
|
|
39 |
from api.db.services.file_service import FileService
|
40 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
41 |
from api.settings import RetCode, retrievaler
|
42 |
-
from api.utils.api_utils import construct_json_result,get_parser_config
|
43 |
from rag.nlp import search
|
44 |
from rag.utils import rmSpace
|
45 |
from rag.utils.es_conn import ELASTICSEARCH
|
@@ -49,36 +49,93 @@ import os
|
|
49 |
MAXIMUM_OF_UPLOADING_FILES = 256
|
50 |
|
51 |
|
52 |
-
|
53 |
-
@manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
|
54 |
@token_required
|
55 |
def upload(dataset_id, tenant_id):
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return get_error_data_result(
|
58 |
-
retmsg=
|
59 |
-
|
|
|
60 |
for file_obj in file_objs:
|
61 |
-
if file_obj.filename ==
|
62 |
return get_result(
|
63 |
-
retmsg=
|
|
|
64 |
# total size
|
65 |
total_size = 0
|
66 |
for file_obj in file_objs:
|
67 |
file_obj.seek(0, os.SEEK_END)
|
68 |
total_size += file_obj.tell()
|
69 |
file_obj.seek(0)
|
70 |
-
MAX_TOTAL_FILE_SIZE=10*1024*1024
|
71 |
if total_size > MAX_TOTAL_FILE_SIZE:
|
72 |
return get_result(
|
73 |
-
retmsg=f
|
74 |
-
retcode=RetCode.ARGUMENT_ERROR
|
|
|
75 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
76 |
if not e:
|
77 |
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
78 |
-
err, files= FileService.upload_document(kb, file_objs, tenant_id)
|
79 |
if err:
|
80 |
-
return get_result(
|
81 |
-
retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
82 |
# rename key's name
|
83 |
renamed_doc_list = []
|
84 |
for file in files:
|
@@ -87,7 +144,7 @@ def upload(dataset_id, tenant_id):
|
|
87 |
"chunk_num": "chunk_count",
|
88 |
"kb_id": "dataset_id",
|
89 |
"token_num": "token_count",
|
90 |
-
"parser_id": "chunk_method"
|
91 |
}
|
92 |
renamed_doc = {}
|
93 |
for key, value in doc.items():
|
@@ -98,9 +155,54 @@ def upload(dataset_id, tenant_id):
|
|
98 |
return get_result(data=renamed_doc_list)
|
99 |
|
100 |
|
101 |
-
@manager.route(
|
102 |
@token_required
|
103 |
def update_doc(tenant_id, dataset_id, document_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
req = request.json
|
105 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
106 |
return get_error_data_result(retmsg="You don't own the dataset.")
|
@@ -115,20 +217,25 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
115 |
if req["token_count"] != doc.token_num:
|
116 |
return get_error_data_result(retmsg="Can't change `token_count`.")
|
117 |
if "progress" in req:
|
118 |
-
if req[
|
119 |
return get_error_data_result(retmsg="Can't change `progress`.")
|
120 |
|
121 |
if "name" in req and req["name"] != doc.name:
|
122 |
-
if
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
125 |
if d.name == req["name"]:
|
126 |
return get_error_data_result(
|
127 |
-
retmsg="Duplicated document name in the same dataset."
|
128 |
-
|
129 |
-
|
130 |
-
return get_error_data_result(
|
131 |
-
retmsg="Database error (Document rename)!")
|
132 |
|
133 |
informs = File2DocumentService.get_by_document_id(document_id)
|
134 |
if informs:
|
@@ -137,77 +244,231 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
137 |
if "parser_config" in req:
|
138 |
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
139 |
if "chunk_method" in req:
|
140 |
-
valid_chunk_method = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
if req.get("chunk_method") not in valid_chunk_method:
|
142 |
-
return get_error_data_result(
|
|
|
|
|
143 |
if doc.parser_id.lower() == req["chunk_method"].lower():
|
144 |
-
|
145 |
|
146 |
-
if doc.type == FileType.VISUAL or re.search(
|
147 |
-
r"\.(ppt|pptx|pages)$", doc.name):
|
148 |
return get_error_data_result(retmsg="Not supported yet!")
|
149 |
|
150 |
-
e = DocumentService.update_by_id(
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
if not e:
|
154 |
return get_error_data_result(retmsg="Document not found!")
|
155 |
-
req["parser_config"] = get_parser_config(
|
|
|
|
|
156 |
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
157 |
if doc.token_num > 0:
|
158 |
-
e = DocumentService.increment_chunk_num(
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
160 |
if not e:
|
161 |
return get_error_data_result(retmsg="Document not found!")
|
162 |
ELASTICSEARCH.deleteByQuery(
|
163 |
-
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)
|
|
|
164 |
|
165 |
return get_result()
|
166 |
|
167 |
|
168 |
-
@manager.route(
|
169 |
@token_required
|
170 |
def download(tenant_id, dataset_id, document_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
172 |
-
return get_error_data_result(retmsg=f
|
173 |
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
174 |
if not doc:
|
175 |
-
return get_error_data_result(
|
|
|
|
|
176 |
# The process of downloading
|
177 |
-
doc_id, doc_location = File2DocumentService.get_storage_address(
|
|
|
|
|
178 |
file_stream = STORAGE_IMPL.get(doc_id, doc_location)
|
179 |
if not file_stream:
|
180 |
-
return construct_json_result(
|
|
|
|
|
181 |
file = BytesIO(file_stream)
|
182 |
# Use send_file with a proper filename and MIME type
|
183 |
return send_file(
|
184 |
file,
|
185 |
as_attachment=True,
|
186 |
download_name=doc[0].name,
|
187 |
-
mimetype=
|
188 |
)
|
189 |
|
190 |
|
191 |
-
@manager.route(
|
192 |
@token_required
|
193 |
def list_docs(dataset_id, tenant_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
195 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
196 |
id = request.args.get("id")
|
197 |
name = request.args.get("name")
|
198 |
-
if not DocumentService.query(id=id,kb_id=dataset_id):
|
199 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
200 |
-
if not DocumentService.query(name=name,kb_id=dataset_id):
|
201 |
return get_error_data_result(retmsg=f"You don't own the document {name}.")
|
202 |
offset = int(request.args.get("offset", 1))
|
203 |
-
keywords = request.args.get("keywords","")
|
204 |
limit = int(request.args.get("limit", 1024))
|
205 |
orderby = request.args.get("orderby", "create_time")
|
206 |
if request.args.get("desc") == "False":
|
207 |
desc = False
|
208 |
else:
|
209 |
desc = True
|
210 |
-
docs, tol = DocumentService.get_list(
|
|
|
|
|
211 |
|
212 |
# rename key's name
|
213 |
renamed_doc_list = []
|
@@ -216,42 +477,80 @@ def list_docs(dataset_id, tenant_id):
|
|
216 |
"chunk_num": "chunk_count",
|
217 |
"kb_id": "dataset_id",
|
218 |
"token_num": "token_count",
|
219 |
-
"parser_id": "chunk_method"
|
220 |
}
|
221 |
run_mapping = {
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
}
|
228 |
renamed_doc = {}
|
229 |
for key, value in doc.items():
|
|
|
|
|
230 |
new_key = key_mapping.get(key, key)
|
231 |
renamed_doc[new_key] = value
|
232 |
-
if key =="run":
|
233 |
-
renamed_doc["run"]=run_mapping.get(value)
|
234 |
renamed_doc_list.append(renamed_doc)
|
235 |
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
236 |
|
237 |
|
238 |
-
@manager.route(
|
239 |
@token_required
|
240 |
-
def delete(tenant_id,dataset_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
242 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
243 |
req = request.json
|
244 |
if not req:
|
245 |
-
doc_ids=None
|
246 |
else:
|
247 |
-
doc_ids=req.get("ids")
|
248 |
if not doc_ids:
|
249 |
doc_list = []
|
250 |
-
docs=DocumentService.query(kb_id=dataset_id)
|
251 |
for doc in docs:
|
252 |
doc_list.append(doc.id)
|
253 |
else:
|
254 |
-
doc_list=doc_ids
|
255 |
root_folder = FileService.get_root_folder(tenant_id)
|
256 |
pf_id = root_folder["id"]
|
257 |
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
@@ -269,10 +568,16 @@ def delete(tenant_id,dataset_id):
|
|
269 |
|
270 |
if not DocumentService.remove_document(doc, tenant_id):
|
271 |
return get_error_data_result(
|
272 |
-
retmsg="Database error (Document removal)!"
|
|
|
273 |
|
274 |
f2d = File2DocumentService.get_by_document_id(doc_id)
|
275 |
-
FileService.filter_delete(
|
|
|
|
|
|
|
|
|
|
|
276 |
File2DocumentService.delete_by_document_id(doc_id)
|
277 |
|
278 |
STORAGE_IMPL.rm(b, n)
|
@@ -285,25 +590,66 @@ def delete(tenant_id,dataset_id):
|
|
285 |
return get_result()
|
286 |
|
287 |
|
288 |
-
@manager.route(
|
289 |
@token_required
|
290 |
-
def parse(tenant_id,dataset_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
292 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
293 |
req = request.json
|
294 |
if not req.get("document_ids"):
|
295 |
return get_error_data_result("`document_ids` is required")
|
296 |
for id in req["document_ids"]:
|
297 |
-
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
298 |
if not doc:
|
299 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
|
|
|
|
|
|
|
|
300 |
info = {"run": "1", "progress": 0}
|
301 |
info["progress_msg"] = ""
|
302 |
info["chunk_num"] = 0
|
303 |
info["token_num"] = 0
|
304 |
DocumentService.update_by_id(id, info)
|
305 |
ELASTICSEARCH.deleteByQuery(
|
306 |
-
Q("match", doc_id=id), idxnm=search.index_name(tenant_id)
|
|
|
307 |
TaskService.filter_delete([Task.doc_id == id])
|
308 |
e, doc = DocumentService.get_by_id(id)
|
309 |
doc = doc.to_dict()
|
@@ -312,9 +658,46 @@ def parse(tenant_id,dataset_id):
|
|
312 |
queue_tasks(doc, bucket, name)
|
313 |
return get_result()
|
314 |
|
315 |
-
|
|
|
316 |
@token_required
|
317 |
-
def stop_parsing(tenant_id,dataset_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
319 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
320 |
req = request.json
|
@@ -325,46 +708,125 @@ def stop_parsing(tenant_id,dataset_id):
|
|
325 |
if not doc:
|
326 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
327 |
if int(doc[0].progress) == 1 or int(doc[0].progress) == 0:
|
328 |
-
return get_error_data_result(
|
329 |
-
|
|
|
|
|
330 |
DocumentService.update_by_id(id, info)
|
331 |
ELASTICSEARCH.deleteByQuery(
|
332 |
-
Q("match", doc_id=id), idxnm=search.index_name(tenant_id)
|
|
|
333 |
return get_result()
|
334 |
|
335 |
|
336 |
-
@manager.route(
|
337 |
@token_required
|
338 |
-
def list_chunks(tenant_id,dataset_id,document_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
340 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
341 |
-
doc=DocumentService.query(id=document_id, kb_id=dataset_id)
|
342 |
if not doc:
|
343 |
-
return get_error_data_result(
|
344 |
-
|
|
|
|
|
345 |
req = request.args
|
346 |
doc_id = document_id
|
347 |
page = int(req.get("offset", 1))
|
348 |
size = int(req.get("limit", 30))
|
349 |
question = req.get("keywords", "")
|
350 |
query = {
|
351 |
-
"doc_ids": [doc_id],
|
|
|
|
|
|
|
|
|
352 |
}
|
353 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
354 |
key_mapping = {
|
355 |
"chunk_num": "chunk_count",
|
356 |
"kb_id": "dataset_id",
|
357 |
"token_num": "token_count",
|
358 |
-
"parser_id": "chunk_method"
|
359 |
}
|
360 |
run_mapping = {
|
361 |
"0": "UNSTART",
|
362 |
"1": "RUNNING",
|
363 |
"2": "CANCEL",
|
364 |
"3": "DONE",
|
365 |
-
"4": "FAIL"
|
366 |
}
|
367 |
-
doc=doc.to_dict()
|
368 |
renamed_doc = {}
|
369 |
for key, value in doc.items():
|
370 |
new_key = key_mapping.get(key, key)
|
@@ -377,21 +839,30 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
377 |
for id in sres.ids:
|
378 |
d = {
|
379 |
"chunk_id": id,
|
380 |
-
"content_with_weight":
|
381 |
-
id]
|
382 |
-
|
|
|
|
|
383 |
"doc_id": sres.field[id]["doc_id"],
|
384 |
"docnm_kwd": sres.field[id]["docnm_kwd"],
|
385 |
"important_kwd": sres.field[id].get("important_kwd", []),
|
386 |
"img_id": sres.field[id].get("img_id", ""),
|
387 |
"available_int": sres.field[id].get("available_int", 1),
|
388 |
-
"positions": sres.field[id].get("position_int", "").split("\t")
|
389 |
}
|
390 |
if len(d["positions"]) % 5 == 0:
|
391 |
poss = []
|
392 |
for i in range(0, len(d["positions"]), 5):
|
393 |
-
poss.append(
|
394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
d["positions"] = poss
|
396 |
|
397 |
origin_chunks.append(d)
|
@@ -411,7 +882,7 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
411 |
"doc_id": "document_id",
|
412 |
"important_kwd": "important_keywords",
|
413 |
"img_id": "image_id",
|
414 |
-
"available_int":"available"
|
415 |
}
|
416 |
renamed_chunk = {}
|
417 |
for key, value in chunk.items():
|
@@ -425,31 +896,104 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
425 |
return get_result(data=res)
|
426 |
|
427 |
|
428 |
-
|
429 |
-
|
|
|
430 |
@token_required
|
431 |
-
def add_chunk(tenant_id,dataset_id,document_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
433 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
434 |
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
435 |
if not doc:
|
436 |
-
return get_error_data_result(
|
|
|
|
|
437 |
doc = doc[0]
|
438 |
req = request.json
|
439 |
if not req.get("content"):
|
440 |
return get_error_data_result(retmsg="`content` is required")
|
441 |
if "important_keywords" in req:
|
442 |
if type(req["important_keywords"]) != list:
|
443 |
-
return get_error_data_result(
|
|
|
|
|
444 |
md5 = hashlib.md5()
|
445 |
md5.update((req["content"] + document_id).encode("utf-8"))
|
446 |
|
447 |
chunk_id = md5.hexdigest()
|
448 |
-
d = {
|
449 |
-
|
|
|
|
|
|
|
450 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
451 |
d["important_kwd"] = req.get("important_keywords", [])
|
452 |
-
d["important_tks"] = rag_tokenizer.tokenize(
|
|
|
|
|
453 |
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
|
454 |
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
|
455 |
d["kb_id"] = [doc.kb_id]
|
@@ -457,17 +1001,17 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
457 |
d["doc_id"] = doc.id
|
458 |
embd_id = DocumentService.get_embd_id(document_id)
|
459 |
embd_mdl = TenantLLMService.model_instance(
|
460 |
-
tenant_id, LLMType.EMBEDDING.value, embd_id
|
461 |
-
|
|
|
462 |
v, c = embd_mdl.encode([doc.name, req["content"]])
|
463 |
v = 0.1 * v[0] + 0.9 * v[1]
|
464 |
d["q_%d_vec" % len(v)] = v.tolist()
|
465 |
ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
|
466 |
|
467 |
-
DocumentService.increment_chunk_num(
|
468 |
-
doc.id, doc.kb_id, c, 1, 0)
|
469 |
d["chunk_id"] = chunk_id
|
470 |
-
d["kb_id"]=doc.kb_id
|
471 |
# rename keys
|
472 |
key_mapping = {
|
473 |
"chunk_id": "id",
|
@@ -477,7 +1021,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
477 |
"kb_id": "dataset_id",
|
478 |
"create_timestamp_flt": "create_timestamp",
|
479 |
"create_time": "create_time",
|
480 |
-
"document_keyword": "document"
|
481 |
}
|
482 |
renamed_chunk = {}
|
483 |
for key, value in d.items():
|
@@ -488,32 +1032,79 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
488 |
# return get_result(data={"chunk_id": chunk_id})
|
489 |
|
490 |
|
491 |
-
@manager.route(
|
|
|
|
|
492 |
@token_required
|
493 |
-
def rm_chunk(tenant_id,dataset_id,document_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
495 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
496 |
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
497 |
if not doc:
|
498 |
-
return get_error_data_result(
|
|
|
|
|
499 |
doc = doc[0]
|
500 |
req = request.json
|
501 |
-
|
502 |
-
|
|
|
503 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
504 |
if not req:
|
505 |
-
chunk_ids=None
|
506 |
else:
|
507 |
-
chunk_ids=req.get("chunk_ids")
|
508 |
if not chunk_ids:
|
509 |
-
chunk_list=sres.ids
|
510 |
else:
|
511 |
-
chunk_list=chunk_ids
|
512 |
for chunk_id in chunk_list:
|
513 |
if chunk_id not in sres.ids:
|
514 |
return get_error_data_result(f"Chunk {chunk_id} not found")
|
515 |
if not ELASTICSEARCH.deleteByQuery(
|
516 |
-
|
|
|
517 |
return get_error_data_result(retmsg="Index updating failure")
|
518 |
deleted_chunk_ids = chunk_list
|
519 |
chunk_number = len(deleted_chunk_ids)
|
@@ -521,37 +1112,92 @@ def rm_chunk(tenant_id,dataset_id,document_id):
|
|
521 |
return get_result()
|
522 |
|
523 |
|
524 |
-
|
525 |
-
|
|
|
526 |
@token_required
|
527 |
-
def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
try:
|
529 |
-
res = ELASTICSEARCH.get(
|
530 |
-
chunk_id, search.index_name(
|
531 |
-
tenant_id))
|
532 |
except Exception as e:
|
533 |
return get_error_data_result(f"Can't find this chunk {chunk_id}")
|
534 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
535 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
536 |
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
537 |
if not doc:
|
538 |
-
return get_error_data_result(
|
|
|
|
|
539 |
doc = doc[0]
|
540 |
query = {
|
541 |
-
"doc_ids": [document_id],
|
|
|
|
|
|
|
|
|
542 |
}
|
543 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
544 |
if chunk_id not in sres.ids:
|
545 |
return get_error_data_result(f"You don't own the chunk {chunk_id}")
|
546 |
req = request.json
|
547 |
-
content=res["_source"].get("content_with_weight")
|
548 |
-
d = {
|
549 |
-
"id": chunk_id,
|
550 |
-
"content_with_weight": req.get("content",content)}
|
551 |
d["content_ltks"] = rag_tokenizer.tokenize(d["content_with_weight"])
|
552 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
553 |
if "important_keywords" in req:
|
554 |
-
if not isinstance(req["important_keywords"],list):
|
555 |
return get_error_data_result("`important_keywords` should be a list")
|
556 |
d["important_kwd"] = req.get("important_keywords")
|
557 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
|
@@ -559,18 +1205,18 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
|
|
559 |
d["available_int"] = int(req["available"])
|
560 |
embd_id = DocumentService.get_embd_id(document_id)
|
561 |
embd_mdl = TenantLLMService.model_instance(
|
562 |
-
tenant_id, LLMType.EMBEDDING.value, embd_id
|
|
|
563 |
if doc.parser_id == ParserType.QA:
|
564 |
-
arr = [
|
565 |
-
t for t in re.split(
|
566 |
-
r"[\n\t]",
|
567 |
-
d["content_with_weight"]) if len(t) > 1]
|
568 |
if len(arr) != 2:
|
569 |
return get_error_data_result(
|
570 |
-
retmsg="Q&A must be separated by TAB/ENTER key."
|
|
|
571 |
q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
|
572 |
-
d = beAdoc(
|
573 |
-
[rag_tokenizer.is_chinese(t) for t in q + a])
|
|
|
574 |
|
575 |
v, c = embd_mdl.encode([doc.name, d["content_with_weight"]])
|
576 |
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
@@ -579,41 +1225,120 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
|
|
579 |
return get_result()
|
580 |
|
581 |
|
582 |
-
|
583 |
-
@manager.route('/retrieval', methods=['POST'])
|
584 |
@token_required
|
585 |
def retrieval_test(tenant_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
586 |
req = request.json
|
587 |
if not req.get("dataset_ids"):
|
588 |
return get_error_data_result("`dataset_ids` is required.")
|
589 |
kb_ids = req["dataset_ids"]
|
590 |
-
if not isinstance(kb_ids,list):
|
591 |
return get_error_data_result("`dataset_ids` should be a list")
|
592 |
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
593 |
for id in kb_ids:
|
594 |
-
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
|
595 |
return get_error_data_result(f"You don't own the dataset {id}.")
|
596 |
embd_nms = list(set([kb.embd_id for kb in kbs]))
|
597 |
if len(embd_nms) != 1:
|
598 |
return get_result(
|
599 |
retmsg='Datasets use different embedding models."',
|
600 |
-
retcode=RetCode.AUTHENTICATION_ERROR
|
|
|
601 |
if "question" not in req:
|
602 |
return get_error_data_result("`question` is required.")
|
603 |
page = int(req.get("offset", 1))
|
604 |
size = int(req.get("limit", 1024))
|
605 |
question = req["question"]
|
606 |
doc_ids = req.get("document_ids", [])
|
607 |
-
if not isinstance(doc_ids,list):
|
608 |
return get_error_data_result("`documents` should be a list")
|
609 |
-
doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids)
|
610 |
for doc_id in doc_ids:
|
611 |
if doc_id not in doc_ids_list:
|
612 |
-
return get_error_data_result(
|
|
|
|
|
613 |
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
614 |
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
615 |
top = int(req.get("top_k", 1024))
|
616 |
-
if req.get("highlight")=="False" or
|
617 |
highlight = False
|
618 |
else:
|
619 |
highlight = True
|
@@ -622,21 +1347,34 @@ def retrieval_test(tenant_id):
|
|
622 |
if not e:
|
623 |
return get_error_data_result(retmsg="Dataset not found!")
|
624 |
embd_mdl = TenantLLMService.model_instance(
|
625 |
-
kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id
|
|
|
626 |
|
627 |
rerank_mdl = None
|
628 |
if req.get("rerank_id"):
|
629 |
rerank_mdl = TenantLLMService.model_instance(
|
630 |
-
kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]
|
|
|
631 |
|
632 |
if req.get("keyword", False):
|
633 |
chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
|
634 |
question += keyword_extraction(chat_mdl, question)
|
635 |
|
636 |
retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
|
637 |
-
ranks = retr.retrieval(
|
638 |
-
|
639 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
for c in ranks["chunks"]:
|
641 |
if "vector" in c:
|
642 |
del c["vector"]
|
@@ -649,7 +1387,7 @@ def retrieval_test(tenant_id):
|
|
649 |
"content_with_weight": "content",
|
650 |
"doc_id": "document_id",
|
651 |
"important_kwd": "important_keywords",
|
652 |
-
"docnm_kwd": "document_keyword"
|
653 |
}
|
654 |
rename_chunk = {}
|
655 |
for key, value in chunk.items():
|
@@ -660,6 +1398,8 @@ def retrieval_test(tenant_id):
|
|
660 |
return get_result(data=ranks)
|
661 |
except Exception as e:
|
662 |
if str(e).find("not_found") > 0:
|
663 |
-
return get_result(
|
664 |
-
|
665 |
-
|
|
|
|
|
|
39 |
from api.db.services.file_service import FileService
|
40 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
41 |
from api.settings import RetCode, retrievaler
|
42 |
+
from api.utils.api_utils import construct_json_result, get_parser_config
|
43 |
from rag.nlp import search
|
44 |
from rag.utils import rmSpace
|
45 |
from rag.utils.es_conn import ELASTICSEARCH
|
|
|
49 |
MAXIMUM_OF_UPLOADING_FILES = 256
|
50 |
|
51 |
|
52 |
+
@manager.route("/datasets/<dataset_id>/documents", methods=["POST"])
|
|
|
53 |
@token_required
|
54 |
def upload(dataset_id, tenant_id):
|
55 |
+
"""
|
56 |
+
Upload documents to a dataset.
|
57 |
+
---
|
58 |
+
tags:
|
59 |
+
- Documents
|
60 |
+
security:
|
61 |
+
- ApiKeyAuth: []
|
62 |
+
parameters:
|
63 |
+
- in: path
|
64 |
+
name: dataset_id
|
65 |
+
type: string
|
66 |
+
required: true
|
67 |
+
description: ID of the dataset.
|
68 |
+
- in: header
|
69 |
+
name: Authorization
|
70 |
+
type: string
|
71 |
+
required: true
|
72 |
+
description: Bearer token for authentication.
|
73 |
+
- in: formData
|
74 |
+
name: file
|
75 |
+
type: file
|
76 |
+
required: true
|
77 |
+
description: Document files to upload.
|
78 |
+
responses:
|
79 |
+
200:
|
80 |
+
description: Successfully uploaded documents.
|
81 |
+
schema:
|
82 |
+
type: object
|
83 |
+
properties:
|
84 |
+
data:
|
85 |
+
type: array
|
86 |
+
items:
|
87 |
+
type: object
|
88 |
+
properties:
|
89 |
+
id:
|
90 |
+
type: string
|
91 |
+
description: Document ID.
|
92 |
+
name:
|
93 |
+
type: string
|
94 |
+
description: Document name.
|
95 |
+
chunk_count:
|
96 |
+
type: integer
|
97 |
+
description: Number of chunks.
|
98 |
+
token_count:
|
99 |
+
type: integer
|
100 |
+
description: Number of tokens.
|
101 |
+
dataset_id:
|
102 |
+
type: string
|
103 |
+
description: ID of the dataset.
|
104 |
+
chunk_method:
|
105 |
+
type: string
|
106 |
+
description: Chunking method used.
|
107 |
+
run:
|
108 |
+
type: string
|
109 |
+
description: Processing status.
|
110 |
+
"""
|
111 |
+
if "file" not in request.files:
|
112 |
return get_error_data_result(
|
113 |
+
retmsg="No file part!", retcode=RetCode.ARGUMENT_ERROR
|
114 |
+
)
|
115 |
+
file_objs = request.files.getlist("file")
|
116 |
for file_obj in file_objs:
|
117 |
+
if file_obj.filename == "":
|
118 |
return get_result(
|
119 |
+
retmsg="No file selected!", retcode=RetCode.ARGUMENT_ERROR
|
120 |
+
)
|
121 |
# total size
|
122 |
total_size = 0
|
123 |
for file_obj in file_objs:
|
124 |
file_obj.seek(0, os.SEEK_END)
|
125 |
total_size += file_obj.tell()
|
126 |
file_obj.seek(0)
|
127 |
+
MAX_TOTAL_FILE_SIZE = 10 * 1024 * 1024
|
128 |
if total_size > MAX_TOTAL_FILE_SIZE:
|
129 |
return get_result(
|
130 |
+
retmsg=f"Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)",
|
131 |
+
retcode=RetCode.ARGUMENT_ERROR,
|
132 |
+
)
|
133 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
134 |
if not e:
|
135 |
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
136 |
+
err, files = FileService.upload_document(kb, file_objs, tenant_id)
|
137 |
if err:
|
138 |
+
return get_result(retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
|
|
139 |
# rename key's name
|
140 |
renamed_doc_list = []
|
141 |
for file in files:
|
|
|
144 |
"chunk_num": "chunk_count",
|
145 |
"kb_id": "dataset_id",
|
146 |
"token_num": "token_count",
|
147 |
+
"parser_id": "chunk_method",
|
148 |
}
|
149 |
renamed_doc = {}
|
150 |
for key, value in doc.items():
|
|
|
155 |
return get_result(data=renamed_doc_list)
|
156 |
|
157 |
|
158 |
+
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PUT"])
|
159 |
@token_required
|
160 |
def update_doc(tenant_id, dataset_id, document_id):
|
161 |
+
"""
|
162 |
+
Update a document within a dataset.
|
163 |
+
---
|
164 |
+
tags:
|
165 |
+
- Documents
|
166 |
+
security:
|
167 |
+
- ApiKeyAuth: []
|
168 |
+
parameters:
|
169 |
+
- in: path
|
170 |
+
name: dataset_id
|
171 |
+
type: string
|
172 |
+
required: true
|
173 |
+
description: ID of the dataset.
|
174 |
+
- in: path
|
175 |
+
name: document_id
|
176 |
+
type: string
|
177 |
+
required: true
|
178 |
+
description: ID of the document to update.
|
179 |
+
- in: header
|
180 |
+
name: Authorization
|
181 |
+
type: string
|
182 |
+
required: true
|
183 |
+
description: Bearer token for authentication.
|
184 |
+
- in: body
|
185 |
+
name: body
|
186 |
+
description: Document update parameters.
|
187 |
+
required: true
|
188 |
+
schema:
|
189 |
+
type: object
|
190 |
+
properties:
|
191 |
+
name:
|
192 |
+
type: string
|
193 |
+
description: New name of the document.
|
194 |
+
parser_config:
|
195 |
+
type: object
|
196 |
+
description: Parser configuration.
|
197 |
+
chunk_method:
|
198 |
+
type: string
|
199 |
+
description: Chunking method.
|
200 |
+
responses:
|
201 |
+
200:
|
202 |
+
description: Document updated successfully.
|
203 |
+
schema:
|
204 |
+
type: object
|
205 |
+
"""
|
206 |
req = request.json
|
207 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
208 |
return get_error_data_result(retmsg="You don't own the dataset.")
|
|
|
217 |
if req["token_count"] != doc.token_num:
|
218 |
return get_error_data_result(retmsg="Can't change `token_count`.")
|
219 |
if "progress" in req:
|
220 |
+
if req["progress"] != doc.progress:
|
221 |
return get_error_data_result(retmsg="Can't change `progress`.")
|
222 |
|
223 |
if "name" in req and req["name"] != doc.name:
|
224 |
+
if (
|
225 |
+
pathlib.Path(req["name"].lower()).suffix
|
226 |
+
!= pathlib.Path(doc.name.lower()).suffix
|
227 |
+
):
|
228 |
+
return get_result(
|
229 |
+
retmsg="The extension of file can't be changed",
|
230 |
+
retcode=RetCode.ARGUMENT_ERROR,
|
231 |
+
)
|
232 |
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
233 |
if d.name == req["name"]:
|
234 |
return get_error_data_result(
|
235 |
+
retmsg="Duplicated document name in the same dataset."
|
236 |
+
)
|
237 |
+
if not DocumentService.update_by_id(document_id, {"name": req["name"]}):
|
238 |
+
return get_error_data_result(retmsg="Database error (Document rename)!")
|
|
|
239 |
|
240 |
informs = File2DocumentService.get_by_document_id(document_id)
|
241 |
if informs:
|
|
|
244 |
if "parser_config" in req:
|
245 |
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
246 |
if "chunk_method" in req:
|
247 |
+
valid_chunk_method = {
|
248 |
+
"naive",
|
249 |
+
"manual",
|
250 |
+
"qa",
|
251 |
+
"table",
|
252 |
+
"paper",
|
253 |
+
"book",
|
254 |
+
"laws",
|
255 |
+
"presentation",
|
256 |
+
"picture",
|
257 |
+
"one",
|
258 |
+
"knowledge_graph",
|
259 |
+
"email",
|
260 |
+
}
|
261 |
if req.get("chunk_method") not in valid_chunk_method:
|
262 |
+
return get_error_data_result(
|
263 |
+
f"`chunk_method` {req['chunk_method']} doesn't exist"
|
264 |
+
)
|
265 |
if doc.parser_id.lower() == req["chunk_method"].lower():
|
266 |
+
return get_result()
|
267 |
|
268 |
+
if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
|
|
|
269 |
return get_error_data_result(retmsg="Not supported yet!")
|
270 |
|
271 |
+
e = DocumentService.update_by_id(
|
272 |
+
doc.id,
|
273 |
+
{
|
274 |
+
"parser_id": req["chunk_method"],
|
275 |
+
"progress": 0,
|
276 |
+
"progress_msg": "",
|
277 |
+
"run": TaskStatus.UNSTART.value,
|
278 |
+
},
|
279 |
+
)
|
280 |
if not e:
|
281 |
return get_error_data_result(retmsg="Document not found!")
|
282 |
+
req["parser_config"] = get_parser_config(
|
283 |
+
req["chunk_method"], req.get("parser_config")
|
284 |
+
)
|
285 |
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
286 |
if doc.token_num > 0:
|
287 |
+
e = DocumentService.increment_chunk_num(
|
288 |
+
doc.id,
|
289 |
+
doc.kb_id,
|
290 |
+
doc.token_num * -1,
|
291 |
+
doc.chunk_num * -1,
|
292 |
+
doc.process_duation * -1,
|
293 |
+
)
|
294 |
if not e:
|
295 |
return get_error_data_result(retmsg="Document not found!")
|
296 |
ELASTICSEARCH.deleteByQuery(
|
297 |
+
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)
|
298 |
+
)
|
299 |
|
300 |
return get_result()
|
301 |
|
302 |
|
303 |
+
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"])
|
304 |
@token_required
|
305 |
def download(tenant_id, dataset_id, document_id):
|
306 |
+
"""
|
307 |
+
Download a document from a dataset.
|
308 |
+
---
|
309 |
+
tags:
|
310 |
+
- Documents
|
311 |
+
security:
|
312 |
+
- ApiKeyAuth: []
|
313 |
+
produces:
|
314 |
+
- application/octet-stream
|
315 |
+
parameters:
|
316 |
+
- in: path
|
317 |
+
name: dataset_id
|
318 |
+
type: string
|
319 |
+
required: true
|
320 |
+
description: ID of the dataset.
|
321 |
+
- in: path
|
322 |
+
name: document_id
|
323 |
+
type: string
|
324 |
+
required: true
|
325 |
+
description: ID of the document to download.
|
326 |
+
- in: header
|
327 |
+
name: Authorization
|
328 |
+
type: string
|
329 |
+
required: true
|
330 |
+
description: Bearer token for authentication.
|
331 |
+
responses:
|
332 |
+
200:
|
333 |
+
description: Document file stream.
|
334 |
+
schema:
|
335 |
+
type: file
|
336 |
+
400:
|
337 |
+
description: Error message.
|
338 |
+
schema:
|
339 |
+
type: object
|
340 |
+
"""
|
341 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
342 |
+
return get_error_data_result(retmsg=f"You do not own the dataset {dataset_id}.")
|
343 |
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
344 |
if not doc:
|
345 |
+
return get_error_data_result(
|
346 |
+
retmsg=f"The dataset not own the document {document_id}."
|
347 |
+
)
|
348 |
# The process of downloading
|
349 |
+
doc_id, doc_location = File2DocumentService.get_storage_address(
|
350 |
+
doc_id=document_id
|
351 |
+
) # minio address
|
352 |
file_stream = STORAGE_IMPL.get(doc_id, doc_location)
|
353 |
if not file_stream:
|
354 |
+
return construct_json_result(
|
355 |
+
message="This file is empty.", code=RetCode.DATA_ERROR
|
356 |
+
)
|
357 |
file = BytesIO(file_stream)
|
358 |
# Use send_file with a proper filename and MIME type
|
359 |
return send_file(
|
360 |
file,
|
361 |
as_attachment=True,
|
362 |
download_name=doc[0].name,
|
363 |
+
mimetype="application/octet-stream", # Set a default MIME type
|
364 |
)
|
365 |
|
366 |
|
367 |
+
@manager.route("/datasets/<dataset_id>/documents", methods=["GET"])
|
368 |
@token_required
|
369 |
def list_docs(dataset_id, tenant_id):
|
370 |
+
"""
|
371 |
+
List documents in a dataset.
|
372 |
+
---
|
373 |
+
tags:
|
374 |
+
- Documents
|
375 |
+
security:
|
376 |
+
- ApiKeyAuth: []
|
377 |
+
parameters:
|
378 |
+
- in: path
|
379 |
+
name: dataset_id
|
380 |
+
type: string
|
381 |
+
required: true
|
382 |
+
description: ID of the dataset.
|
383 |
+
- in: query
|
384 |
+
name: id
|
385 |
+
type: string
|
386 |
+
required: false
|
387 |
+
description: Filter by document ID.
|
388 |
+
- in: query
|
389 |
+
name: offset
|
390 |
+
type: integer
|
391 |
+
required: false
|
392 |
+
default: 1
|
393 |
+
description: Page number.
|
394 |
+
- in: query
|
395 |
+
name: limit
|
396 |
+
type: integer
|
397 |
+
required: false
|
398 |
+
default: 1024
|
399 |
+
description: Number of items per page.
|
400 |
+
- in: query
|
401 |
+
name: orderby
|
402 |
+
type: string
|
403 |
+
required: false
|
404 |
+
default: "create_time"
|
405 |
+
description: Field to order by.
|
406 |
+
- in: query
|
407 |
+
name: desc
|
408 |
+
type: boolean
|
409 |
+
required: false
|
410 |
+
default: true
|
411 |
+
description: Order in descending.
|
412 |
+
- in: header
|
413 |
+
name: Authorization
|
414 |
+
type: string
|
415 |
+
required: true
|
416 |
+
description: Bearer token for authentication.
|
417 |
+
responses:
|
418 |
+
200:
|
419 |
+
description: List of documents.
|
420 |
+
schema:
|
421 |
+
type: object
|
422 |
+
properties:
|
423 |
+
total:
|
424 |
+
type: integer
|
425 |
+
description: Total number of documents.
|
426 |
+
docs:
|
427 |
+
type: array
|
428 |
+
items:
|
429 |
+
type: object
|
430 |
+
properties:
|
431 |
+
id:
|
432 |
+
type: string
|
433 |
+
description: Document ID.
|
434 |
+
name:
|
435 |
+
type: string
|
436 |
+
description: Document name.
|
437 |
+
chunk_count:
|
438 |
+
type: integer
|
439 |
+
description: Number of chunks.
|
440 |
+
token_count:
|
441 |
+
type: integer
|
442 |
+
description: Number of tokens.
|
443 |
+
dataset_id:
|
444 |
+
type: string
|
445 |
+
description: ID of the dataset.
|
446 |
+
chunk_method:
|
447 |
+
type: string
|
448 |
+
description: Chunking method used.
|
449 |
+
run:
|
450 |
+
type: string
|
451 |
+
description: Processing status.
|
452 |
+
"""
|
453 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
454 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
455 |
id = request.args.get("id")
|
456 |
name = request.args.get("name")
|
457 |
+
if not DocumentService.query(id=id, kb_id=dataset_id):
|
458 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
459 |
+
if not DocumentService.query(name=name, kb_id=dataset_id):
|
460 |
return get_error_data_result(retmsg=f"You don't own the document {name}.")
|
461 |
offset = int(request.args.get("offset", 1))
|
462 |
+
keywords = request.args.get("keywords", "")
|
463 |
limit = int(request.args.get("limit", 1024))
|
464 |
orderby = request.args.get("orderby", "create_time")
|
465 |
if request.args.get("desc") == "False":
|
466 |
desc = False
|
467 |
else:
|
468 |
desc = True
|
469 |
+
docs, tol = DocumentService.get_list(
|
470 |
+
dataset_id, offset, limit, orderby, desc, keywords, id, name
|
471 |
+
)
|
472 |
|
473 |
# rename key's name
|
474 |
renamed_doc_list = []
|
|
|
477 |
"chunk_num": "chunk_count",
|
478 |
"kb_id": "dataset_id",
|
479 |
"token_num": "token_count",
|
480 |
+
"parser_id": "chunk_method",
|
481 |
}
|
482 |
run_mapping = {
|
483 |
+
"0": "UNSTART",
|
484 |
+
"1": "RUNNING",
|
485 |
+
"2": "CANCEL",
|
486 |
+
"3": "DONE",
|
487 |
+
"4": "FAIL",
|
488 |
}
|
489 |
renamed_doc = {}
|
490 |
for key, value in doc.items():
|
491 |
+
if key == "run":
|
492 |
+
renamed_doc["run"] = run_mapping.get(str(value))
|
493 |
new_key = key_mapping.get(key, key)
|
494 |
renamed_doc[new_key] = value
|
495 |
+
if key == "run":
|
496 |
+
renamed_doc["run"] = run_mapping.get(value)
|
497 |
renamed_doc_list.append(renamed_doc)
|
498 |
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
499 |
|
500 |
|
501 |
+
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"])
|
502 |
@token_required
|
503 |
+
def delete(tenant_id, dataset_id):
|
504 |
+
"""
|
505 |
+
Delete documents from a dataset.
|
506 |
+
---
|
507 |
+
tags:
|
508 |
+
- Documents
|
509 |
+
security:
|
510 |
+
- ApiKeyAuth: []
|
511 |
+
parameters:
|
512 |
+
- in: path
|
513 |
+
name: dataset_id
|
514 |
+
type: string
|
515 |
+
required: true
|
516 |
+
description: ID of the dataset.
|
517 |
+
- in: body
|
518 |
+
name: body
|
519 |
+
description: Document deletion parameters.
|
520 |
+
required: true
|
521 |
+
schema:
|
522 |
+
type: object
|
523 |
+
properties:
|
524 |
+
ids:
|
525 |
+
type: array
|
526 |
+
items:
|
527 |
+
type: string
|
528 |
+
description: List of document IDs to delete.
|
529 |
+
- in: header
|
530 |
+
name: Authorization
|
531 |
+
type: string
|
532 |
+
required: true
|
533 |
+
description: Bearer token for authentication.
|
534 |
+
responses:
|
535 |
+
200:
|
536 |
+
description: Documents deleted successfully.
|
537 |
+
schema:
|
538 |
+
type: object
|
539 |
+
"""
|
540 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
541 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
542 |
req = request.json
|
543 |
if not req:
|
544 |
+
doc_ids = None
|
545 |
else:
|
546 |
+
doc_ids = req.get("ids")
|
547 |
if not doc_ids:
|
548 |
doc_list = []
|
549 |
+
docs = DocumentService.query(kb_id=dataset_id)
|
550 |
for doc in docs:
|
551 |
doc_list.append(doc.id)
|
552 |
else:
|
553 |
+
doc_list = doc_ids
|
554 |
root_folder = FileService.get_root_folder(tenant_id)
|
555 |
pf_id = root_folder["id"]
|
556 |
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
|
|
568 |
|
569 |
if not DocumentService.remove_document(doc, tenant_id):
|
570 |
return get_error_data_result(
|
571 |
+
retmsg="Database error (Document removal)!"
|
572 |
+
)
|
573 |
|
574 |
f2d = File2DocumentService.get_by_document_id(doc_id)
|
575 |
+
FileService.filter_delete(
|
576 |
+
[
|
577 |
+
File.source_type == FileSource.KNOWLEDGEBASE,
|
578 |
+
File.id == f2d[0].file_id,
|
579 |
+
]
|
580 |
+
)
|
581 |
File2DocumentService.delete_by_document_id(doc_id)
|
582 |
|
583 |
STORAGE_IMPL.rm(b, n)
|
|
|
590 |
return get_result()
|
591 |
|
592 |
|
593 |
+
@manager.route("/datasets/<dataset_id>/chunks", methods=["POST"])
|
594 |
@token_required
|
595 |
+
def parse(tenant_id, dataset_id):
|
596 |
+
"""
|
597 |
+
Start parsing documents into chunks.
|
598 |
+
---
|
599 |
+
tags:
|
600 |
+
- Chunks
|
601 |
+
security:
|
602 |
+
- ApiKeyAuth: []
|
603 |
+
parameters:
|
604 |
+
- in: path
|
605 |
+
name: dataset_id
|
606 |
+
type: string
|
607 |
+
required: true
|
608 |
+
description: ID of the dataset.
|
609 |
+
- in: body
|
610 |
+
name: body
|
611 |
+
description: Parsing parameters.
|
612 |
+
required: true
|
613 |
+
schema:
|
614 |
+
type: object
|
615 |
+
properties:
|
616 |
+
document_ids:
|
617 |
+
type: array
|
618 |
+
items:
|
619 |
+
type: string
|
620 |
+
description: List of document IDs to parse.
|
621 |
+
- in: header
|
622 |
+
name: Authorization
|
623 |
+
type: string
|
624 |
+
required: true
|
625 |
+
description: Bearer token for authentication.
|
626 |
+
responses:
|
627 |
+
200:
|
628 |
+
description: Parsing started successfully.
|
629 |
+
schema:
|
630 |
+
type: object
|
631 |
+
"""
|
632 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
633 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
634 |
req = request.json
|
635 |
if not req.get("document_ids"):
|
636 |
return get_error_data_result("`document_ids` is required")
|
637 |
for id in req["document_ids"]:
|
638 |
+
doc = DocumentService.query(id=id, kb_id=dataset_id)
|
639 |
if not doc:
|
640 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
641 |
+
if doc[0].progress != 0.0:
|
642 |
+
return get_error_data_result(
|
643 |
+
"Can't stop parsing document with progress at 0 or 100"
|
644 |
+
)
|
645 |
info = {"run": "1", "progress": 0}
|
646 |
info["progress_msg"] = ""
|
647 |
info["chunk_num"] = 0
|
648 |
info["token_num"] = 0
|
649 |
DocumentService.update_by_id(id, info)
|
650 |
ELASTICSEARCH.deleteByQuery(
|
651 |
+
Q("match", doc_id=id), idxnm=search.index_name(tenant_id)
|
652 |
+
)
|
653 |
TaskService.filter_delete([Task.doc_id == id])
|
654 |
e, doc = DocumentService.get_by_id(id)
|
655 |
doc = doc.to_dict()
|
|
|
658 |
queue_tasks(doc, bucket, name)
|
659 |
return get_result()
|
660 |
|
661 |
+
|
662 |
+
@manager.route("/datasets/<dataset_id>/chunks", methods=["DELETE"])
|
663 |
@token_required
|
664 |
+
def stop_parsing(tenant_id, dataset_id):
|
665 |
+
"""
|
666 |
+
Stop parsing documents into chunks.
|
667 |
+
---
|
668 |
+
tags:
|
669 |
+
- Chunks
|
670 |
+
security:
|
671 |
+
- ApiKeyAuth: []
|
672 |
+
parameters:
|
673 |
+
- in: path
|
674 |
+
name: dataset_id
|
675 |
+
type: string
|
676 |
+
required: true
|
677 |
+
description: ID of the dataset.
|
678 |
+
- in: body
|
679 |
+
name: body
|
680 |
+
description: Stop parsing parameters.
|
681 |
+
required: true
|
682 |
+
schema:
|
683 |
+
type: object
|
684 |
+
properties:
|
685 |
+
document_ids:
|
686 |
+
type: array
|
687 |
+
items:
|
688 |
+
type: string
|
689 |
+
description: List of document IDs to stop parsing.
|
690 |
+
- in: header
|
691 |
+
name: Authorization
|
692 |
+
type: string
|
693 |
+
required: true
|
694 |
+
description: Bearer token for authentication.
|
695 |
+
responses:
|
696 |
+
200:
|
697 |
+
description: Parsing stopped successfully.
|
698 |
+
schema:
|
699 |
+
type: object
|
700 |
+
"""
|
701 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
702 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
703 |
req = request.json
|
|
|
708 |
if not doc:
|
709 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
710 |
if int(doc[0].progress) == 1 or int(doc[0].progress) == 0:
|
711 |
+
return get_error_data_result(
|
712 |
+
"Can't stop parsing document with progress at 0 or 1"
|
713 |
+
)
|
714 |
+
info = {"run": "2", "progress": 0, "chunk_num": 0}
|
715 |
DocumentService.update_by_id(id, info)
|
716 |
ELASTICSEARCH.deleteByQuery(
|
717 |
+
Q("match", doc_id=id), idxnm=search.index_name(tenant_id)
|
718 |
+
)
|
719 |
return get_result()
|
720 |
|
721 |
|
722 |
+
@manager.route("/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["GET"])
|
723 |
@token_required
|
724 |
+
def list_chunks(tenant_id, dataset_id, document_id):
|
725 |
+
"""
|
726 |
+
List chunks of a document.
|
727 |
+
---
|
728 |
+
tags:
|
729 |
+
- Chunks
|
730 |
+
security:
|
731 |
+
- ApiKeyAuth: []
|
732 |
+
parameters:
|
733 |
+
- in: path
|
734 |
+
name: dataset_id
|
735 |
+
type: string
|
736 |
+
required: true
|
737 |
+
description: ID of the dataset.
|
738 |
+
- in: path
|
739 |
+
name: document_id
|
740 |
+
type: string
|
741 |
+
required: true
|
742 |
+
description: ID of the document.
|
743 |
+
- in: query
|
744 |
+
name: offset
|
745 |
+
type: integer
|
746 |
+
required: false
|
747 |
+
default: 1
|
748 |
+
description: Page number.
|
749 |
+
- in: query
|
750 |
+
name: limit
|
751 |
+
type: integer
|
752 |
+
required: false
|
753 |
+
default: 30
|
754 |
+
description: Number of items per page.
|
755 |
+
- in: header
|
756 |
+
name: Authorization
|
757 |
+
type: string
|
758 |
+
required: true
|
759 |
+
description: Bearer token for authentication.
|
760 |
+
responses:
|
761 |
+
200:
|
762 |
+
description: List of chunks.
|
763 |
+
schema:
|
764 |
+
type: object
|
765 |
+
properties:
|
766 |
+
total:
|
767 |
+
type: integer
|
768 |
+
description: Total number of chunks.
|
769 |
+
chunks:
|
770 |
+
type: array
|
771 |
+
items:
|
772 |
+
type: object
|
773 |
+
properties:
|
774 |
+
id:
|
775 |
+
type: string
|
776 |
+
description: Chunk ID.
|
777 |
+
content:
|
778 |
+
type: string
|
779 |
+
description: Chunk content.
|
780 |
+
document_id:
|
781 |
+
type: string
|
782 |
+
description: ID of the document.
|
783 |
+
important_keywords:
|
784 |
+
type: array
|
785 |
+
items:
|
786 |
+
type: string
|
787 |
+
description: Important keywords.
|
788 |
+
image_id:
|
789 |
+
type: string
|
790 |
+
description: Image ID associated with the chunk.
|
791 |
+
doc:
|
792 |
+
type: object
|
793 |
+
description: Document details.
|
794 |
+
"""
|
795 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
796 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
797 |
+
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
798 |
if not doc:
|
799 |
+
return get_error_data_result(
|
800 |
+
retmsg=f"You don't own the document {document_id}."
|
801 |
+
)
|
802 |
+
doc = doc[0]
|
803 |
req = request.args
|
804 |
doc_id = document_id
|
805 |
page = int(req.get("offset", 1))
|
806 |
size = int(req.get("limit", 30))
|
807 |
question = req.get("keywords", "")
|
808 |
query = {
|
809 |
+
"doc_ids": [doc_id],
|
810 |
+
"page": page,
|
811 |
+
"size": size,
|
812 |
+
"question": question,
|
813 |
+
"sort": True,
|
814 |
}
|
815 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
816 |
key_mapping = {
|
817 |
"chunk_num": "chunk_count",
|
818 |
"kb_id": "dataset_id",
|
819 |
"token_num": "token_count",
|
820 |
+
"parser_id": "chunk_method",
|
821 |
}
|
822 |
run_mapping = {
|
823 |
"0": "UNSTART",
|
824 |
"1": "RUNNING",
|
825 |
"2": "CANCEL",
|
826 |
"3": "DONE",
|
827 |
+
"4": "FAIL",
|
828 |
}
|
829 |
+
doc = doc.to_dict()
|
830 |
renamed_doc = {}
|
831 |
for key, value in doc.items():
|
832 |
new_key = key_mapping.get(key, key)
|
|
|
839 |
for id in sres.ids:
|
840 |
d = {
|
841 |
"chunk_id": id,
|
842 |
+
"content_with_weight": (
|
843 |
+
rmSpace(sres.highlight[id])
|
844 |
+
if question and id in sres.highlight
|
845 |
+
else sres.field[id].get("content_with_weight", "")
|
846 |
+
),
|
847 |
"doc_id": sres.field[id]["doc_id"],
|
848 |
"docnm_kwd": sres.field[id]["docnm_kwd"],
|
849 |
"important_kwd": sres.field[id].get("important_kwd", []),
|
850 |
"img_id": sres.field[id].get("img_id", ""),
|
851 |
"available_int": sres.field[id].get("available_int", 1),
|
852 |
+
"positions": sres.field[id].get("position_int", "").split("\t"),
|
853 |
}
|
854 |
if len(d["positions"]) % 5 == 0:
|
855 |
poss = []
|
856 |
for i in range(0, len(d["positions"]), 5):
|
857 |
+
poss.append(
|
858 |
+
[
|
859 |
+
float(d["positions"][i]),
|
860 |
+
float(d["positions"][i + 1]),
|
861 |
+
float(d["positions"][i + 2]),
|
862 |
+
float(d["positions"][i + 3]),
|
863 |
+
float(d["positions"][i + 4]),
|
864 |
+
]
|
865 |
+
)
|
866 |
d["positions"] = poss
|
867 |
|
868 |
origin_chunks.append(d)
|
|
|
882 |
"doc_id": "document_id",
|
883 |
"important_kwd": "important_keywords",
|
884 |
"img_id": "image_id",
|
885 |
+
"available_int": "available",
|
886 |
}
|
887 |
renamed_chunk = {}
|
888 |
for key, value in chunk.items():
|
|
|
896 |
return get_result(data=res)
|
897 |
|
898 |
|
899 |
+
@manager.route(
|
900 |
+
"/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["POST"]
|
901 |
+
)
|
902 |
@token_required
|
903 |
+
def add_chunk(tenant_id, dataset_id, document_id):
|
904 |
+
"""
|
905 |
+
Add a chunk to a document.
|
906 |
+
---
|
907 |
+
tags:
|
908 |
+
- Chunks
|
909 |
+
security:
|
910 |
+
- ApiKeyAuth: []
|
911 |
+
parameters:
|
912 |
+
- in: path
|
913 |
+
name: dataset_id
|
914 |
+
type: string
|
915 |
+
required: true
|
916 |
+
description: ID of the dataset.
|
917 |
+
- in: path
|
918 |
+
name: document_id
|
919 |
+
type: string
|
920 |
+
required: true
|
921 |
+
description: ID of the document.
|
922 |
+
- in: body
|
923 |
+
name: body
|
924 |
+
description: Chunk data.
|
925 |
+
required: true
|
926 |
+
schema:
|
927 |
+
type: object
|
928 |
+
properties:
|
929 |
+
content:
|
930 |
+
type: string
|
931 |
+
required: true
|
932 |
+
description: Content of the chunk.
|
933 |
+
important_keywords:
|
934 |
+
type: array
|
935 |
+
items:
|
936 |
+
type: string
|
937 |
+
description: Important keywords.
|
938 |
+
- in: header
|
939 |
+
name: Authorization
|
940 |
+
type: string
|
941 |
+
required: true
|
942 |
+
description: Bearer token for authentication.
|
943 |
+
responses:
|
944 |
+
200:
|
945 |
+
description: Chunk added successfully.
|
946 |
+
schema:
|
947 |
+
type: object
|
948 |
+
properties:
|
949 |
+
chunk:
|
950 |
+
type: object
|
951 |
+
properties:
|
952 |
+
id:
|
953 |
+
type: string
|
954 |
+
description: Chunk ID.
|
955 |
+
content:
|
956 |
+
type: string
|
957 |
+
description: Chunk content.
|
958 |
+
document_id:
|
959 |
+
type: string
|
960 |
+
description: ID of the document.
|
961 |
+
important_keywords:
|
962 |
+
type: array
|
963 |
+
items:
|
964 |
+
type: string
|
965 |
+
description: Important keywords.
|
966 |
+
"""
|
967 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
968 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
969 |
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
970 |
if not doc:
|
971 |
+
return get_error_data_result(
|
972 |
+
retmsg=f"You don't own the document {document_id}."
|
973 |
+
)
|
974 |
doc = doc[0]
|
975 |
req = request.json
|
976 |
if not req.get("content"):
|
977 |
return get_error_data_result(retmsg="`content` is required")
|
978 |
if "important_keywords" in req:
|
979 |
if type(req["important_keywords"]) != list:
|
980 |
+
return get_error_data_result(
|
981 |
+
"`important_keywords` is required to be a list"
|
982 |
+
)
|
983 |
md5 = hashlib.md5()
|
984 |
md5.update((req["content"] + document_id).encode("utf-8"))
|
985 |
|
986 |
chunk_id = md5.hexdigest()
|
987 |
+
d = {
|
988 |
+
"id": chunk_id,
|
989 |
+
"content_ltks": rag_tokenizer.tokenize(req["content"]),
|
990 |
+
"content_with_weight": req["content"],
|
991 |
+
}
|
992 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
993 |
d["important_kwd"] = req.get("important_keywords", [])
|
994 |
+
d["important_tks"] = rag_tokenizer.tokenize(
|
995 |
+
" ".join(req.get("important_keywords", []))
|
996 |
+
)
|
997 |
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
|
998 |
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
|
999 |
d["kb_id"] = [doc.kb_id]
|
|
|
1001 |
d["doc_id"] = doc.id
|
1002 |
embd_id = DocumentService.get_embd_id(document_id)
|
1003 |
embd_mdl = TenantLLMService.model_instance(
|
1004 |
+
tenant_id, LLMType.EMBEDDING.value, embd_id
|
1005 |
+
)
|
1006 |
+
print(embd_mdl, flush=True)
|
1007 |
v, c = embd_mdl.encode([doc.name, req["content"]])
|
1008 |
v = 0.1 * v[0] + 0.9 * v[1]
|
1009 |
d["q_%d_vec" % len(v)] = v.tolist()
|
1010 |
ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
|
1011 |
|
1012 |
+
DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
|
|
|
1013 |
d["chunk_id"] = chunk_id
|
1014 |
+
d["kb_id"] = doc.kb_id
|
1015 |
# rename keys
|
1016 |
key_mapping = {
|
1017 |
"chunk_id": "id",
|
|
|
1021 |
"kb_id": "dataset_id",
|
1022 |
"create_timestamp_flt": "create_timestamp",
|
1023 |
"create_time": "create_time",
|
1024 |
+
"document_keyword": "document",
|
1025 |
}
|
1026 |
renamed_chunk = {}
|
1027 |
for key, value in d.items():
|
|
|
1032 |
# return get_result(data={"chunk_id": chunk_id})
|
1033 |
|
1034 |
|
1035 |
+
@manager.route(
|
1036 |
+
"datasets/<dataset_id>/documents/<document_id>/chunks", methods=["DELETE"]
|
1037 |
+
)
|
1038 |
@token_required
|
1039 |
+
def rm_chunk(tenant_id, dataset_id, document_id):
|
1040 |
+
"""
|
1041 |
+
Remove chunks from a document.
|
1042 |
+
---
|
1043 |
+
tags:
|
1044 |
+
- Chunks
|
1045 |
+
security:
|
1046 |
+
- ApiKeyAuth: []
|
1047 |
+
parameters:
|
1048 |
+
- in: path
|
1049 |
+
name: dataset_id
|
1050 |
+
type: string
|
1051 |
+
required: true
|
1052 |
+
description: ID of the dataset.
|
1053 |
+
- in: path
|
1054 |
+
name: document_id
|
1055 |
+
type: string
|
1056 |
+
required: true
|
1057 |
+
description: ID of the document.
|
1058 |
+
- in: body
|
1059 |
+
name: body
|
1060 |
+
description: Chunk removal parameters.
|
1061 |
+
required: true
|
1062 |
+
schema:
|
1063 |
+
type: object
|
1064 |
+
properties:
|
1065 |
+
chunk_ids:
|
1066 |
+
type: array
|
1067 |
+
items:
|
1068 |
+
type: string
|
1069 |
+
description: List of chunk IDs to remove.
|
1070 |
+
- in: header
|
1071 |
+
name: Authorization
|
1072 |
+
type: string
|
1073 |
+
required: true
|
1074 |
+
description: Bearer token for authentication.
|
1075 |
+
responses:
|
1076 |
+
200:
|
1077 |
+
description: Chunks removed successfully.
|
1078 |
+
schema:
|
1079 |
+
type: object
|
1080 |
+
"""
|
1081 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
1082 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
1083 |
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
1084 |
if not doc:
|
1085 |
+
return get_error_data_result(
|
1086 |
+
retmsg=f"You don't own the document {document_id}."
|
1087 |
+
)
|
1088 |
doc = doc[0]
|
1089 |
req = request.json
|
1090 |
+
if not req.get("chunk_ids"):
|
1091 |
+
return get_error_data_result("`chunk_ids` is required")
|
1092 |
+
query = {"doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
|
1093 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
1094 |
if not req:
|
1095 |
+
chunk_ids = None
|
1096 |
else:
|
1097 |
+
chunk_ids = req.get("chunk_ids")
|
1098 |
if not chunk_ids:
|
1099 |
+
chunk_list = sres.ids
|
1100 |
else:
|
1101 |
+
chunk_list = chunk_ids
|
1102 |
for chunk_id in chunk_list:
|
1103 |
if chunk_id not in sres.ids:
|
1104 |
return get_error_data_result(f"Chunk {chunk_id} not found")
|
1105 |
if not ELASTICSEARCH.deleteByQuery(
|
1106 |
+
Q("ids", values=chunk_list), search.index_name(tenant_id)
|
1107 |
+
):
|
1108 |
return get_error_data_result(retmsg="Index updating failure")
|
1109 |
deleted_chunk_ids = chunk_list
|
1110 |
chunk_number = len(deleted_chunk_ids)
|
|
|
1112 |
return get_result()
|
1113 |
|
1114 |
|
1115 |
+
@manager.route(
|
1116 |
+
"/datasets/<dataset_id>/documents/<document_id>/chunks/<chunk_id>", methods=["PUT"]
|
1117 |
+
)
|
1118 |
@token_required
|
1119 |
+
def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
|
1120 |
+
"""
|
1121 |
+
Update a chunk within a document.
|
1122 |
+
---
|
1123 |
+
tags:
|
1124 |
+
- Chunks
|
1125 |
+
security:
|
1126 |
+
- ApiKeyAuth: []
|
1127 |
+
parameters:
|
1128 |
+
- in: path
|
1129 |
+
name: dataset_id
|
1130 |
+
type: string
|
1131 |
+
required: true
|
1132 |
+
description: ID of the dataset.
|
1133 |
+
- in: path
|
1134 |
+
name: document_id
|
1135 |
+
type: string
|
1136 |
+
required: true
|
1137 |
+
description: ID of the document.
|
1138 |
+
- in: path
|
1139 |
+
name: chunk_id
|
1140 |
+
type: string
|
1141 |
+
required: true
|
1142 |
+
description: ID of the chunk to update.
|
1143 |
+
- in: body
|
1144 |
+
name: body
|
1145 |
+
description: Chunk update parameters.
|
1146 |
+
required: true
|
1147 |
+
schema:
|
1148 |
+
type: object
|
1149 |
+
properties:
|
1150 |
+
content:
|
1151 |
+
type: string
|
1152 |
+
description: Updated content of the chunk.
|
1153 |
+
important_keywords:
|
1154 |
+
type: array
|
1155 |
+
items:
|
1156 |
+
type: string
|
1157 |
+
description: Updated important keywords.
|
1158 |
+
available:
|
1159 |
+
type: boolean
|
1160 |
+
description: Availability status of the chunk.
|
1161 |
+
- in: header
|
1162 |
+
name: Authorization
|
1163 |
+
type: string
|
1164 |
+
required: true
|
1165 |
+
description: Bearer token for authentication.
|
1166 |
+
responses:
|
1167 |
+
200:
|
1168 |
+
description: Chunk updated successfully.
|
1169 |
+
schema:
|
1170 |
+
type: object
|
1171 |
+
"""
|
1172 |
try:
|
1173 |
+
res = ELASTICSEARCH.get(chunk_id, search.index_name(tenant_id))
|
|
|
|
|
1174 |
except Exception as e:
|
1175 |
return get_error_data_result(f"Can't find this chunk {chunk_id}")
|
1176 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
1177 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
1178 |
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
1179 |
if not doc:
|
1180 |
+
return get_error_data_result(
|
1181 |
+
retmsg=f"You don't own the document {document_id}."
|
1182 |
+
)
|
1183 |
doc = doc[0]
|
1184 |
query = {
|
1185 |
+
"doc_ids": [document_id],
|
1186 |
+
"page": 1,
|
1187 |
+
"size": 1024,
|
1188 |
+
"question": "",
|
1189 |
+
"sort": True,
|
1190 |
}
|
1191 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
1192 |
if chunk_id not in sres.ids:
|
1193 |
return get_error_data_result(f"You don't own the chunk {chunk_id}")
|
1194 |
req = request.json
|
1195 |
+
content = res["_source"].get("content_with_weight")
|
1196 |
+
d = {"id": chunk_id, "content_with_weight": req.get("content", content)}
|
|
|
|
|
1197 |
d["content_ltks"] = rag_tokenizer.tokenize(d["content_with_weight"])
|
1198 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
1199 |
if "important_keywords" in req:
|
1200 |
+
if not isinstance(req["important_keywords"], list):
|
1201 |
return get_error_data_result("`important_keywords` should be a list")
|
1202 |
d["important_kwd"] = req.get("important_keywords")
|
1203 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
|
|
|
1205 |
d["available_int"] = int(req["available"])
|
1206 |
embd_id = DocumentService.get_embd_id(document_id)
|
1207 |
embd_mdl = TenantLLMService.model_instance(
|
1208 |
+
tenant_id, LLMType.EMBEDDING.value, embd_id
|
1209 |
+
)
|
1210 |
if doc.parser_id == ParserType.QA:
|
1211 |
+
arr = [t for t in re.split(r"[\n\t]", d["content_with_weight"]) if len(t) > 1]
|
|
|
|
|
|
|
1212 |
if len(arr) != 2:
|
1213 |
return get_error_data_result(
|
1214 |
+
retmsg="Q&A must be separated by TAB/ENTER key."
|
1215 |
+
)
|
1216 |
q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
|
1217 |
+
d = beAdoc(
|
1218 |
+
d, arr[0], arr[1], not any([rag_tokenizer.is_chinese(t) for t in q + a])
|
1219 |
+
)
|
1220 |
|
1221 |
v, c = embd_mdl.encode([doc.name, d["content_with_weight"]])
|
1222 |
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
|
|
1225 |
return get_result()
|
1226 |
|
1227 |
|
1228 |
+
@manager.route("/retrieval", methods=["POST"])
|
|
|
1229 |
@token_required
|
1230 |
def retrieval_test(tenant_id):
|
1231 |
+
"""
|
1232 |
+
Retrieve chunks based on a query.
|
1233 |
+
---
|
1234 |
+
tags:
|
1235 |
+
- Retrieval
|
1236 |
+
security:
|
1237 |
+
- ApiKeyAuth: []
|
1238 |
+
parameters:
|
1239 |
+
- in: body
|
1240 |
+
name: body
|
1241 |
+
description: Retrieval parameters.
|
1242 |
+
required: true
|
1243 |
+
schema:
|
1244 |
+
type: object
|
1245 |
+
properties:
|
1246 |
+
dataset_ids:
|
1247 |
+
type: array
|
1248 |
+
items:
|
1249 |
+
type: string
|
1250 |
+
required: true
|
1251 |
+
description: List of dataset IDs to search in.
|
1252 |
+
question:
|
1253 |
+
type: string
|
1254 |
+
required: true
|
1255 |
+
description: Query string.
|
1256 |
+
document_ids:
|
1257 |
+
type: array
|
1258 |
+
items:
|
1259 |
+
type: string
|
1260 |
+
description: List of document IDs to filter.
|
1261 |
+
similarity_threshold:
|
1262 |
+
type: number
|
1263 |
+
format: float
|
1264 |
+
description: Similarity threshold.
|
1265 |
+
vector_similarity_weight:
|
1266 |
+
type: number
|
1267 |
+
format: float
|
1268 |
+
description: Vector similarity weight.
|
1269 |
+
top_k:
|
1270 |
+
type: integer
|
1271 |
+
description: Maximum number of chunks to return.
|
1272 |
+
highlight:
|
1273 |
+
type: boolean
|
1274 |
+
description: Whether to highlight matched content.
|
1275 |
+
- in: header
|
1276 |
+
name: Authorization
|
1277 |
+
type: string
|
1278 |
+
required: true
|
1279 |
+
description: Bearer token for authentication.
|
1280 |
+
responses:
|
1281 |
+
200:
|
1282 |
+
description: Retrieval results.
|
1283 |
+
schema:
|
1284 |
+
type: object
|
1285 |
+
properties:
|
1286 |
+
chunks:
|
1287 |
+
type: array
|
1288 |
+
items:
|
1289 |
+
type: object
|
1290 |
+
properties:
|
1291 |
+
id:
|
1292 |
+
type: string
|
1293 |
+
description: Chunk ID.
|
1294 |
+
content:
|
1295 |
+
type: string
|
1296 |
+
description: Chunk content.
|
1297 |
+
document_id:
|
1298 |
+
type: string
|
1299 |
+
description: ID of the document.
|
1300 |
+
dataset_id:
|
1301 |
+
type: string
|
1302 |
+
description: ID of the dataset.
|
1303 |
+
similarity:
|
1304 |
+
type: number
|
1305 |
+
format: float
|
1306 |
+
description: Similarity score.
|
1307 |
+
"""
|
1308 |
req = request.json
|
1309 |
if not req.get("dataset_ids"):
|
1310 |
return get_error_data_result("`dataset_ids` is required.")
|
1311 |
kb_ids = req["dataset_ids"]
|
1312 |
+
if not isinstance(kb_ids, list):
|
1313 |
return get_error_data_result("`dataset_ids` should be a list")
|
1314 |
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
1315 |
for id in kb_ids:
|
1316 |
+
if not KnowledgebaseService.query(id=id, tenant_id=tenant_id):
|
1317 |
return get_error_data_result(f"You don't own the dataset {id}.")
|
1318 |
embd_nms = list(set([kb.embd_id for kb in kbs]))
|
1319 |
if len(embd_nms) != 1:
|
1320 |
return get_result(
|
1321 |
retmsg='Datasets use different embedding models."',
|
1322 |
+
retcode=RetCode.AUTHENTICATION_ERROR,
|
1323 |
+
)
|
1324 |
if "question" not in req:
|
1325 |
return get_error_data_result("`question` is required.")
|
1326 |
page = int(req.get("offset", 1))
|
1327 |
size = int(req.get("limit", 1024))
|
1328 |
question = req["question"]
|
1329 |
doc_ids = req.get("document_ids", [])
|
1330 |
+
if not isinstance(doc_ids, list):
|
1331 |
return get_error_data_result("`documents` should be a list")
|
1332 |
+
doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids)
|
1333 |
for doc_id in doc_ids:
|
1334 |
if doc_id not in doc_ids_list:
|
1335 |
+
return get_error_data_result(
|
1336 |
+
f"The datasets don't own the document {doc_id}"
|
1337 |
+
)
|
1338 |
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
1339 |
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
1340 |
top = int(req.get("top_k", 1024))
|
1341 |
+
if req.get("highlight") == "False" or req.get("highlight") == "false":
|
1342 |
highlight = False
|
1343 |
else:
|
1344 |
highlight = True
|
|
|
1347 |
if not e:
|
1348 |
return get_error_data_result(retmsg="Dataset not found!")
|
1349 |
embd_mdl = TenantLLMService.model_instance(
|
1350 |
+
kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id
|
1351 |
+
)
|
1352 |
|
1353 |
rerank_mdl = None
|
1354 |
if req.get("rerank_id"):
|
1355 |
rerank_mdl = TenantLLMService.model_instance(
|
1356 |
+
kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]
|
1357 |
+
)
|
1358 |
|
1359 |
if req.get("keyword", False):
|
1360 |
chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
|
1361 |
question += keyword_extraction(chat_mdl, question)
|
1362 |
|
1363 |
retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
|
1364 |
+
ranks = retr.retrieval(
|
1365 |
+
question,
|
1366 |
+
embd_mdl,
|
1367 |
+
kb.tenant_id,
|
1368 |
+
kb_ids,
|
1369 |
+
page,
|
1370 |
+
size,
|
1371 |
+
similarity_threshold,
|
1372 |
+
vector_similarity_weight,
|
1373 |
+
top,
|
1374 |
+
doc_ids,
|
1375 |
+
rerank_mdl=rerank_mdl,
|
1376 |
+
highlight=highlight,
|
1377 |
+
)
|
1378 |
for c in ranks["chunks"]:
|
1379 |
if "vector" in c:
|
1380 |
del c["vector"]
|
|
|
1387 |
"content_with_weight": "content",
|
1388 |
"doc_id": "document_id",
|
1389 |
"important_kwd": "important_keywords",
|
1390 |
+
"docnm_kwd": "document_keyword",
|
1391 |
}
|
1392 |
rename_chunk = {}
|
1393 |
for key, value in chunk.items():
|
|
|
1398 |
return get_result(data=ranks)
|
1399 |
except Exception as e:
|
1400 |
if str(e).find("not_found") > 0:
|
1401 |
+
return get_result(
|
1402 |
+
retmsg=f"No chunk found! Check the chunk status please!",
|
1403 |
+
retcode=RetCode.DATA_ERROR,
|
1404 |
+
)
|
1405 |
+
return server_error_response(e)
|
api/apps/system_app.py
CHANGED
@@ -24,8 +24,14 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
24 |
from api.db.services.user_service import UserTenantService
|
25 |
from api.settings import DATABASE_TYPE
|
26 |
from api.utils import current_timestamp, datetime_format
|
27 |
-
from api.utils.api_utils import
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
from api.versions import get_rag_version
|
30 |
from rag.utils.es_conn import ELASTICSEARCH
|
31 |
from rag.utils.storage_factory import STORAGE_IMPL, STORAGE_IMPL_TYPE
|
@@ -34,44 +40,121 @@ from timeit import default_timer as timer
|
|
34 |
from rag.utils.redis_conn import REDIS_CONN
|
35 |
|
36 |
|
37 |
-
@manager.route(
|
38 |
@login_required
|
39 |
def version():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
return get_json_result(data=get_rag_version())
|
41 |
|
42 |
|
43 |
-
@manager.route(
|
44 |
@login_required
|
45 |
def status():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
res = {}
|
47 |
st = timer()
|
48 |
try:
|
49 |
res["es"] = ELASTICSEARCH.health()
|
50 |
-
res["es"]["elapsed"] = "{:.1f}".format((timer() - st)*1000.)
|
51 |
except Exception as e:
|
52 |
-
res["es"] = {
|
|
|
|
|
|
|
|
|
53 |
|
54 |
st = timer()
|
55 |
try:
|
56 |
STORAGE_IMPL.health()
|
57 |
-
res["storage"] = {
|
|
|
|
|
|
|
|
|
58 |
except Exception as e:
|
59 |
-
res["storage"] = {
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
st = timer()
|
62 |
try:
|
63 |
KnowledgebaseService.get_by_id("x")
|
64 |
-
res["database"] = {
|
|
|
|
|
|
|
|
|
65 |
except Exception as e:
|
66 |
-
res["database"] = {
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
st = timer()
|
69 |
try:
|
70 |
if not REDIS_CONN.health():
|
71 |
raise Exception("Lost connection!")
|
72 |
-
res["redis"] = {
|
|
|
|
|
|
|
73 |
except Exception as e:
|
74 |
-
res["redis"] = {
|
|
|
|
|
|
|
|
|
75 |
|
76 |
try:
|
77 |
v = REDIS_CONN.get("TASKEXE")
|
@@ -84,10 +167,12 @@ def status():
|
|
84 |
if len(arr) == 1:
|
85 |
obj[id] = [0]
|
86 |
else:
|
87 |
-
obj[id] = [arr[i+1]-arr[i] for i in range(len(arr)-1)]
|
88 |
elapsed = max(obj[id])
|
89 |
-
if elapsed > 50:
|
90 |
-
|
|
|
|
|
91 |
res["task_executor"] = {"status": color, "elapsed": obj}
|
92 |
except Exception as e:
|
93 |
res["task_executor"] = {"status": "red", "error": str(e)}
|
@@ -95,21 +180,46 @@ def status():
|
|
95 |
return get_json_result(data=res)
|
96 |
|
97 |
|
98 |
-
@manager.route(
|
99 |
@login_required
|
100 |
def new_token():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
try:
|
102 |
tenants = UserTenantService.query(user_id=current_user.id)
|
103 |
if not tenants:
|
104 |
return get_data_error_result(retmsg="Tenant not found!")
|
105 |
|
106 |
tenant_id = tenants[0].tenant_id
|
107 |
-
obj = {
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
113 |
|
114 |
if not APITokenService.save(**obj):
|
115 |
return get_data_error_result(retmsg="Fail to new a dialog!")
|
@@ -119,9 +229,37 @@ def new_token():
|
|
119 |
return server_error_response(e)
|
120 |
|
121 |
|
122 |
-
@manager.route(
|
123 |
@login_required
|
124 |
def token_list():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
try:
|
126 |
tenants = UserTenantService.query(user_id=current_user.id)
|
127 |
if not tenants:
|
@@ -133,9 +271,33 @@ def token_list():
|
|
133 |
return server_error_response(e)
|
134 |
|
135 |
|
136 |
-
@manager.route(
|
137 |
@login_required
|
138 |
def rm(token):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
APITokenService.filter_delete(
|
140 |
-
|
141 |
-
|
|
|
|
24 |
from api.db.services.user_service import UserTenantService
|
25 |
from api.settings import DATABASE_TYPE
|
26 |
from api.utils import current_timestamp, datetime_format
|
27 |
+
from api.utils.api_utils import (
|
28 |
+
get_json_result,
|
29 |
+
get_data_error_result,
|
30 |
+
server_error_response,
|
31 |
+
generate_confirmation_token,
|
32 |
+
request,
|
33 |
+
validate_request,
|
34 |
+
)
|
35 |
from api.versions import get_rag_version
|
36 |
from rag.utils.es_conn import ELASTICSEARCH
|
37 |
from rag.utils.storage_factory import STORAGE_IMPL, STORAGE_IMPL_TYPE
|
|
|
40 |
from rag.utils.redis_conn import REDIS_CONN
|
41 |
|
42 |
|
43 |
+
@manager.route("/version", methods=["GET"])
|
44 |
@login_required
|
45 |
def version():
|
46 |
+
"""
|
47 |
+
Get the current version of the application.
|
48 |
+
---
|
49 |
+
tags:
|
50 |
+
- System
|
51 |
+
security:
|
52 |
+
- ApiKeyAuth: []
|
53 |
+
responses:
|
54 |
+
200:
|
55 |
+
description: Version retrieved successfully.
|
56 |
+
schema:
|
57 |
+
type: object
|
58 |
+
properties:
|
59 |
+
version:
|
60 |
+
type: string
|
61 |
+
description: Version number.
|
62 |
+
"""
|
63 |
return get_json_result(data=get_rag_version())
|
64 |
|
65 |
|
66 |
+
@manager.route("/status", methods=["GET"])
|
67 |
@login_required
|
68 |
def status():
|
69 |
+
"""
|
70 |
+
Get the system status.
|
71 |
+
---
|
72 |
+
tags:
|
73 |
+
- System
|
74 |
+
security:
|
75 |
+
- ApiKeyAuth: []
|
76 |
+
responses:
|
77 |
+
200:
|
78 |
+
description: System is operational.
|
79 |
+
schema:
|
80 |
+
type: object
|
81 |
+
properties:
|
82 |
+
es:
|
83 |
+
type: object
|
84 |
+
description: Elasticsearch status.
|
85 |
+
storage:
|
86 |
+
type: object
|
87 |
+
description: Storage status.
|
88 |
+
database:
|
89 |
+
type: object
|
90 |
+
description: Database status.
|
91 |
+
503:
|
92 |
+
description: Service unavailable.
|
93 |
+
schema:
|
94 |
+
type: object
|
95 |
+
properties:
|
96 |
+
error:
|
97 |
+
type: string
|
98 |
+
description: Error message.
|
99 |
+
"""
|
100 |
res = {}
|
101 |
st = timer()
|
102 |
try:
|
103 |
res["es"] = ELASTICSEARCH.health()
|
104 |
+
res["es"]["elapsed"] = "{:.1f}".format((timer() - st) * 1000.0)
|
105 |
except Exception as e:
|
106 |
+
res["es"] = {
|
107 |
+
"status": "red",
|
108 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
109 |
+
"error": str(e),
|
110 |
+
}
|
111 |
|
112 |
st = timer()
|
113 |
try:
|
114 |
STORAGE_IMPL.health()
|
115 |
+
res["storage"] = {
|
116 |
+
"storage": STORAGE_IMPL_TYPE.lower(),
|
117 |
+
"status": "green",
|
118 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
119 |
+
}
|
120 |
except Exception as e:
|
121 |
+
res["storage"] = {
|
122 |
+
"storage": STORAGE_IMPL_TYPE.lower(),
|
123 |
+
"status": "red",
|
124 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
125 |
+
"error": str(e),
|
126 |
+
}
|
127 |
|
128 |
st = timer()
|
129 |
try:
|
130 |
KnowledgebaseService.get_by_id("x")
|
131 |
+
res["database"] = {
|
132 |
+
"database": DATABASE_TYPE.lower(),
|
133 |
+
"status": "green",
|
134 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
135 |
+
}
|
136 |
except Exception as e:
|
137 |
+
res["database"] = {
|
138 |
+
"database": DATABASE_TYPE.lower(),
|
139 |
+
"status": "red",
|
140 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
141 |
+
"error": str(e),
|
142 |
+
}
|
143 |
|
144 |
st = timer()
|
145 |
try:
|
146 |
if not REDIS_CONN.health():
|
147 |
raise Exception("Lost connection!")
|
148 |
+
res["redis"] = {
|
149 |
+
"status": "green",
|
150 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
151 |
+
}
|
152 |
except Exception as e:
|
153 |
+
res["redis"] = {
|
154 |
+
"status": "red",
|
155 |
+
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
|
156 |
+
"error": str(e),
|
157 |
+
}
|
158 |
|
159 |
try:
|
160 |
v = REDIS_CONN.get("TASKEXE")
|
|
|
167 |
if len(arr) == 1:
|
168 |
obj[id] = [0]
|
169 |
else:
|
170 |
+
obj[id] = [arr[i + 1] - arr[i] for i in range(len(arr) - 1)]
|
171 |
elapsed = max(obj[id])
|
172 |
+
if elapsed > 50:
|
173 |
+
color = "yellow"
|
174 |
+
if elapsed > 120:
|
175 |
+
color = "red"
|
176 |
res["task_executor"] = {"status": color, "elapsed": obj}
|
177 |
except Exception as e:
|
178 |
res["task_executor"] = {"status": "red", "error": str(e)}
|
|
|
180 |
return get_json_result(data=res)
|
181 |
|
182 |
|
183 |
+
@manager.route("/new_token", methods=["POST"])
|
184 |
@login_required
|
185 |
def new_token():
|
186 |
+
"""
|
187 |
+
Generate a new API token.
|
188 |
+
---
|
189 |
+
tags:
|
190 |
+
- API Tokens
|
191 |
+
security:
|
192 |
+
- ApiKeyAuth: []
|
193 |
+
parameters:
|
194 |
+
- in: query
|
195 |
+
name: name
|
196 |
+
type: string
|
197 |
+
required: false
|
198 |
+
description: Name of the token.
|
199 |
+
responses:
|
200 |
+
200:
|
201 |
+
description: Token generated successfully.
|
202 |
+
schema:
|
203 |
+
type: object
|
204 |
+
properties:
|
205 |
+
token:
|
206 |
+
type: string
|
207 |
+
description: The generated API token.
|
208 |
+
"""
|
209 |
try:
|
210 |
tenants = UserTenantService.query(user_id=current_user.id)
|
211 |
if not tenants:
|
212 |
return get_data_error_result(retmsg="Tenant not found!")
|
213 |
|
214 |
tenant_id = tenants[0].tenant_id
|
215 |
+
obj = {
|
216 |
+
"tenant_id": tenant_id,
|
217 |
+
"token": generate_confirmation_token(tenant_id),
|
218 |
+
"create_time": current_timestamp(),
|
219 |
+
"create_date": datetime_format(datetime.now()),
|
220 |
+
"update_time": None,
|
221 |
+
"update_date": None,
|
222 |
+
}
|
223 |
|
224 |
if not APITokenService.save(**obj):
|
225 |
return get_data_error_result(retmsg="Fail to new a dialog!")
|
|
|
229 |
return server_error_response(e)
|
230 |
|
231 |
|
232 |
+
@manager.route("/token_list", methods=["GET"])
|
233 |
@login_required
|
234 |
def token_list():
|
235 |
+
"""
|
236 |
+
List all API tokens for the current user.
|
237 |
+
---
|
238 |
+
tags:
|
239 |
+
- API Tokens
|
240 |
+
security:
|
241 |
+
- ApiKeyAuth: []
|
242 |
+
responses:
|
243 |
+
200:
|
244 |
+
description: List of API tokens.
|
245 |
+
schema:
|
246 |
+
type: object
|
247 |
+
properties:
|
248 |
+
tokens:
|
249 |
+
type: array
|
250 |
+
items:
|
251 |
+
type: object
|
252 |
+
properties:
|
253 |
+
token:
|
254 |
+
type: string
|
255 |
+
description: The API token.
|
256 |
+
name:
|
257 |
+
type: string
|
258 |
+
description: Name of the token.
|
259 |
+
create_time:
|
260 |
+
type: string
|
261 |
+
description: Token creation time.
|
262 |
+
"""
|
263 |
try:
|
264 |
tenants = UserTenantService.query(user_id=current_user.id)
|
265 |
if not tenants:
|
|
|
271 |
return server_error_response(e)
|
272 |
|
273 |
|
274 |
+
@manager.route("/token/<token>", methods=["DELETE"])
|
275 |
@login_required
|
276 |
def rm(token):
|
277 |
+
"""
|
278 |
+
Remove an API token.
|
279 |
+
---
|
280 |
+
tags:
|
281 |
+
- API Tokens
|
282 |
+
security:
|
283 |
+
- ApiKeyAuth: []
|
284 |
+
parameters:
|
285 |
+
- in: path
|
286 |
+
name: token
|
287 |
+
type: string
|
288 |
+
required: true
|
289 |
+
description: The API token to remove.
|
290 |
+
responses:
|
291 |
+
200:
|
292 |
+
description: Token removed successfully.
|
293 |
+
schema:
|
294 |
+
type: object
|
295 |
+
properties:
|
296 |
+
success:
|
297 |
+
type: boolean
|
298 |
+
description: Deletion status.
|
299 |
+
"""
|
300 |
APITokenService.filter_delete(
|
301 |
+
[APIToken.tenant_id == current_user.id, APIToken.token == token]
|
302 |
+
)
|
303 |
+
return get_json_result(data=True)
|
api/apps/user_app.py
CHANGED
@@ -23,65 +23,141 @@ from flask_login import login_required, current_user, login_user, logout_user
|
|
23 |
|
24 |
from api.db.db_models import TenantLLM
|
25 |
from api.db.services.llm_service import TenantLLMService, LLMService
|
26 |
-
from api.utils.api_utils import
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
from api.db import UserTenantRole, LLMType, FileType
|
29 |
-
from api.settings import
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
33 |
from api.db.services.file_service import FileService
|
34 |
from api.settings import stat_logger
|
35 |
from api.utils.api_utils import get_json_result, construct_response
|
36 |
|
37 |
|
38 |
-
@manager.route(
|
39 |
def login():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
if not request.json:
|
41 |
-
return get_json_result(
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
email = request.json.get(
|
46 |
users = UserService.query(email=email)
|
47 |
if not users:
|
48 |
-
return get_json_result(
|
49 |
-
|
50 |
-
|
|
|
|
|
51 |
|
52 |
-
password = request.json.get(
|
53 |
try:
|
54 |
password = decrypt(password)
|
55 |
except BaseException:
|
56 |
-
return get_json_result(
|
57 |
-
|
58 |
-
|
59 |
|
60 |
user = UserService.query_user(email, password)
|
61 |
if user:
|
62 |
response_data = user.to_json()
|
63 |
user.access_token = get_uuid()
|
64 |
login_user(user)
|
65 |
-
user.update_time = current_timestamp(),
|
66 |
-
user.update_date = datetime_format(datetime.now()),
|
67 |
user.save()
|
68 |
msg = "Welcome back!"
|
69 |
return construct_response(data=response_data, auth=user.get_id(), retmsg=msg)
|
70 |
else:
|
71 |
-
return get_json_result(
|
72 |
-
|
73 |
-
|
|
|
|
|
74 |
|
75 |
|
76 |
-
@manager.route(
|
77 |
def github_callback():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
import requests
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
res = res.json()
|
86 |
if "error" in res:
|
87 |
return redirect("/?error=%s" % res["error_description"])
|
@@ -103,19 +179,22 @@ def github_callback():
|
|
103 |
except Exception as e:
|
104 |
stat_logger.exception(e)
|
105 |
avatar = ""
|
106 |
-
users = user_register(
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
115 |
if not users:
|
116 |
-
raise Exception(f
|
117 |
if len(users) > 1:
|
118 |
-
raise Exception(f
|
119 |
|
120 |
# Try to log in
|
121 |
user = users[0]
|
@@ -134,30 +213,56 @@ def github_callback():
|
|
134 |
return redirect("/?auth=%s" % user.get_id())
|
135 |
|
136 |
|
137 |
-
@manager.route(
|
138 |
def feishu_callback():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
import requests
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
146 |
app_access_token_res = app_access_token_res.json()
|
147 |
-
if app_access_token_res[
|
148 |
return redirect("/?error=%s" % app_access_token_res)
|
149 |
|
150 |
-
res = requests.post(
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
159 |
res = res.json()
|
160 |
-
if res[
|
161 |
return redirect("/?error=%s" % res["message"])
|
162 |
|
163 |
if "contact:user.email:readonly" not in res["data"]["scope"].split(" "):
|
@@ -176,19 +281,22 @@ def feishu_callback():
|
|
176 |
except Exception as e:
|
177 |
stat_logger.exception(e)
|
178 |
avatar = ""
|
179 |
-
users = user_register(
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
188 |
if not users:
|
189 |
-
raise Exception(f
|
190 |
if len(users) > 1:
|
191 |
-
raise Exception(f
|
192 |
|
193 |
# Try to log in
|
194 |
user = users[0]
|
@@ -209,11 +317,14 @@ def feishu_callback():
|
|
209 |
|
210 |
def user_info_from_feishu(access_token):
|
211 |
import requests
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
214 |
res = requests.get(
|
215 |
-
f"https://open.feishu.cn/open-apis/authen/v1/user_info",
|
216 |
-
|
217 |
user_info = res.json()["data"]
|
218 |
user_info["email"] = None if user_info.get("email") == "" else user_info["email"]
|
219 |
return user_info
|
@@ -221,24 +332,38 @@ def user_info_from_feishu(access_token):
|
|
221 |
|
222 |
def user_info_from_github(access_token):
|
223 |
import requests
|
224 |
-
|
225 |
-
|
226 |
res = requests.get(
|
227 |
-
f"https://api.github.com/user?access_token={access_token}",
|
228 |
-
|
229 |
user_info = res.json()
|
230 |
email_info = requests.get(
|
231 |
f"https://api.github.com/user/emails?access_token={access_token}",
|
232 |
-
headers=headers
|
|
|
233 |
user_info["email"] = next(
|
234 |
-
(email for email in email_info if email[
|
235 |
-
|
236 |
return user_info
|
237 |
|
238 |
|
239 |
-
@manager.route("/logout", methods=[
|
240 |
@login_required
|
241 |
def log_out():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
current_user.access_token = ""
|
243 |
current_user.save()
|
244 |
logout_user()
|
@@ -248,20 +373,62 @@ def log_out():
|
|
248 |
@manager.route("/setting", methods=["POST"])
|
249 |
@login_required
|
250 |
def setting_user():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
update_dict = {}
|
252 |
request_data = request.json
|
253 |
if request_data.get("password"):
|
254 |
new_password = request_data.get("new_password")
|
255 |
if not check_password_hash(
|
256 |
-
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
if new_password:
|
260 |
update_dict["password"] = generate_password_hash(decrypt(new_password))
|
261 |
|
262 |
for k in request_data.keys():
|
263 |
-
if k in [
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
continue
|
266 |
update_dict[k] = request_data[k]
|
267 |
|
@@ -270,12 +437,37 @@ def setting_user():
|
|
270 |
return get_json_result(data=True)
|
271 |
except Exception as e:
|
272 |
stat_logger.exception(e)
|
273 |
-
return get_json_result(
|
|
|
|
|
274 |
|
275 |
|
276 |
@manager.route("/info", methods=["GET"])
|
277 |
@login_required
|
278 |
def user_profile():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
return get_json_result(data=current_user.to_dict())
|
280 |
|
281 |
|
@@ -310,13 +502,13 @@ def user_register(user_id, user):
|
|
310 |
"asr_id": ASR_MDL,
|
311 |
"parser_ids": PARSERS,
|
312 |
"img2txt_id": IMAGE2TEXT_MDL,
|
313 |
-
"rerank_id": RERANK_MDL
|
314 |
}
|
315 |
usr_tenant = {
|
316 |
"tenant_id": user_id,
|
317 |
"user_id": user_id,
|
318 |
"invited_by": user_id,
|
319 |
-
"role": UserTenantRole.OWNER
|
320 |
}
|
321 |
file_id = get_uuid()
|
322 |
file = {
|
@@ -331,13 +523,16 @@ def user_register(user_id, user):
|
|
331 |
}
|
332 |
tenant_llm = []
|
333 |
for llm in LLMService.query(fid=LLM_FACTORY):
|
334 |
-
tenant_llm.append(
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
|
|
|
|
|
|
341 |
|
342 |
if not UserService.save(**user):
|
343 |
return
|
@@ -351,21 +546,52 @@ def user_register(user_id, user):
|
|
351 |
@manager.route("/register", methods=["POST"])
|
352 |
@validate_request("nickname", "email", "password")
|
353 |
def user_add():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
req = request.json
|
355 |
email_address = req["email"]
|
356 |
|
357 |
# Validate the email address
|
358 |
if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,5}$", email_address):
|
359 |
-
return get_json_result(
|
360 |
-
|
361 |
-
|
|
|
|
|
362 |
|
363 |
# Check if the email address is already used
|
364 |
if UserService.query(email=email_address):
|
365 |
return get_json_result(
|
366 |
data=False,
|
367 |
-
retmsg=f
|
368 |
-
retcode=RetCode.OPERATING_ERROR
|
|
|
369 |
|
370 |
# Construct user info data
|
371 |
nickname = req["nickname"]
|
@@ -383,25 +609,55 @@ def user_add():
|
|
383 |
try:
|
384 |
users = user_register(user_id, user_dict)
|
385 |
if not users:
|
386 |
-
raise Exception(f
|
387 |
if len(users) > 1:
|
388 |
-
raise Exception(f
|
389 |
user = users[0]
|
390 |
login_user(user)
|
391 |
-
return construct_response(
|
392 |
-
|
393 |
-
|
|
|
|
|
394 |
except Exception as e:
|
395 |
rollback_user_registration(user_id)
|
396 |
stat_logger.exception(e)
|
397 |
-
return get_json_result(
|
398 |
-
|
399 |
-
|
|
|
|
|
400 |
|
401 |
|
402 |
@manager.route("/tenant_info", methods=["GET"])
|
403 |
@login_required
|
404 |
def tenant_info():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
try:
|
406 |
tenants = TenantService.get_info_by(current_user.id)
|
407 |
if not tenants:
|
@@ -415,6 +671,42 @@ def tenant_info():
|
|
415 |
@login_required
|
416 |
@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id")
|
417 |
def set_tenant_info():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
req = request.json
|
419 |
try:
|
420 |
tid = req["tenant_id"]
|
|
|
23 |
|
24 |
from api.db.db_models import TenantLLM
|
25 |
from api.db.services.llm_service import TenantLLMService, LLMService
|
26 |
+
from api.utils.api_utils import (
|
27 |
+
server_error_response,
|
28 |
+
validate_request,
|
29 |
+
get_data_error_result,
|
30 |
+
)
|
31 |
+
from api.utils import (
|
32 |
+
get_uuid,
|
33 |
+
get_format_time,
|
34 |
+
decrypt,
|
35 |
+
download_img,
|
36 |
+
current_timestamp,
|
37 |
+
datetime_format,
|
38 |
+
)
|
39 |
from api.db import UserTenantRole, LLMType, FileType
|
40 |
+
from api.settings import (
|
41 |
+
RetCode,
|
42 |
+
GITHUB_OAUTH,
|
43 |
+
FEISHU_OAUTH,
|
44 |
+
CHAT_MDL,
|
45 |
+
EMBEDDING_MDL,
|
46 |
+
ASR_MDL,
|
47 |
+
IMAGE2TEXT_MDL,
|
48 |
+
PARSERS,
|
49 |
+
API_KEY,
|
50 |
+
LLM_FACTORY,
|
51 |
+
LLM_BASE_URL,
|
52 |
+
RERANK_MDL,
|
53 |
+
)
|
54 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
55 |
from api.db.services.file_service import FileService
|
56 |
from api.settings import stat_logger
|
57 |
from api.utils.api_utils import get_json_result, construct_response
|
58 |
|
59 |
|
60 |
+
@manager.route("/login", methods=["POST", "GET"])
|
61 |
def login():
|
62 |
+
"""
|
63 |
+
User login endpoint.
|
64 |
+
---
|
65 |
+
tags:
|
66 |
+
- User
|
67 |
+
parameters:
|
68 |
+
- in: body
|
69 |
+
name: body
|
70 |
+
description: Login credentials.
|
71 |
+
required: true
|
72 |
+
schema:
|
73 |
+
type: object
|
74 |
+
properties:
|
75 |
+
email:
|
76 |
+
type: string
|
77 |
+
description: User email.
|
78 |
+
password:
|
79 |
+
type: string
|
80 |
+
description: User password.
|
81 |
+
responses:
|
82 |
+
200:
|
83 |
+
description: Login successful.
|
84 |
+
schema:
|
85 |
+
type: object
|
86 |
+
401:
|
87 |
+
description: Authentication failed.
|
88 |
+
schema:
|
89 |
+
type: object
|
90 |
+
"""
|
91 |
if not request.json:
|
92 |
+
return get_json_result(
|
93 |
+
data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg="Unauthorized!"
|
94 |
+
)
|
95 |
|
96 |
+
email = request.json.get("email", "")
|
97 |
users = UserService.query(email=email)
|
98 |
if not users:
|
99 |
+
return get_json_result(
|
100 |
+
data=False,
|
101 |
+
retcode=RetCode.AUTHENTICATION_ERROR,
|
102 |
+
retmsg=f"Email: {email} is not registered!",
|
103 |
+
)
|
104 |
|
105 |
+
password = request.json.get("password")
|
106 |
try:
|
107 |
password = decrypt(password)
|
108 |
except BaseException:
|
109 |
+
return get_json_result(
|
110 |
+
data=False, retcode=RetCode.SERVER_ERROR, retmsg="Fail to crypt password"
|
111 |
+
)
|
112 |
|
113 |
user = UserService.query_user(email, password)
|
114 |
if user:
|
115 |
response_data = user.to_json()
|
116 |
user.access_token = get_uuid()
|
117 |
login_user(user)
|
118 |
+
user.update_time = (current_timestamp(),)
|
119 |
+
user.update_date = (datetime_format(datetime.now()),)
|
120 |
user.save()
|
121 |
msg = "Welcome back!"
|
122 |
return construct_response(data=response_data, auth=user.get_id(), retmsg=msg)
|
123 |
else:
|
124 |
+
return get_json_result(
|
125 |
+
data=False,
|
126 |
+
retcode=RetCode.AUTHENTICATION_ERROR,
|
127 |
+
retmsg="Email and password do not match!",
|
128 |
+
)
|
129 |
|
130 |
|
131 |
+
@manager.route("/github_callback", methods=["GET"])
|
132 |
def github_callback():
|
133 |
+
"""
|
134 |
+
GitHub OAuth callback endpoint.
|
135 |
+
---
|
136 |
+
tags:
|
137 |
+
- OAuth
|
138 |
+
parameters:
|
139 |
+
- in: query
|
140 |
+
name: code
|
141 |
+
type: string
|
142 |
+
required: true
|
143 |
+
description: Authorization code from GitHub.
|
144 |
+
responses:
|
145 |
+
200:
|
146 |
+
description: Authentication successful.
|
147 |
+
schema:
|
148 |
+
type: object
|
149 |
+
"""
|
150 |
import requests
|
151 |
+
|
152 |
+
res = requests.post(
|
153 |
+
GITHUB_OAUTH.get("url"),
|
154 |
+
data={
|
155 |
+
"client_id": GITHUB_OAUTH.get("client_id"),
|
156 |
+
"client_secret": GITHUB_OAUTH.get("secret_key"),
|
157 |
+
"code": request.args.get("code"),
|
158 |
+
},
|
159 |
+
headers={"Accept": "application/json"},
|
160 |
+
)
|
161 |
res = res.json()
|
162 |
if "error" in res:
|
163 |
return redirect("/?error=%s" % res["error_description"])
|
|
|
179 |
except Exception as e:
|
180 |
stat_logger.exception(e)
|
181 |
avatar = ""
|
182 |
+
users = user_register(
|
183 |
+
user_id,
|
184 |
+
{
|
185 |
+
"access_token": session["access_token"],
|
186 |
+
"email": email_address,
|
187 |
+
"avatar": avatar,
|
188 |
+
"nickname": user_info["login"],
|
189 |
+
"login_channel": "github",
|
190 |
+
"last_login_time": get_format_time(),
|
191 |
+
"is_superuser": False,
|
192 |
+
},
|
193 |
+
)
|
194 |
if not users:
|
195 |
+
raise Exception(f"Fail to register {email_address}.")
|
196 |
if len(users) > 1:
|
197 |
+
raise Exception(f"Same email: {email_address} exists!")
|
198 |
|
199 |
# Try to log in
|
200 |
user = users[0]
|
|
|
213 |
return redirect("/?auth=%s" % user.get_id())
|
214 |
|
215 |
|
216 |
+
@manager.route("/feishu_callback", methods=["GET"])
|
217 |
def feishu_callback():
|
218 |
+
"""
|
219 |
+
Feishu OAuth callback endpoint.
|
220 |
+
---
|
221 |
+
tags:
|
222 |
+
- OAuth
|
223 |
+
parameters:
|
224 |
+
- in: query
|
225 |
+
name: code
|
226 |
+
type: string
|
227 |
+
required: true
|
228 |
+
description: Authorization code from Feishu.
|
229 |
+
responses:
|
230 |
+
200:
|
231 |
+
description: Authentication successful.
|
232 |
+
schema:
|
233 |
+
type: object
|
234 |
+
"""
|
235 |
import requests
|
236 |
+
|
237 |
+
app_access_token_res = requests.post(
|
238 |
+
FEISHU_OAUTH.get("app_access_token_url"),
|
239 |
+
data=json.dumps(
|
240 |
+
{
|
241 |
+
"app_id": FEISHU_OAUTH.get("app_id"),
|
242 |
+
"app_secret": FEISHU_OAUTH.get("app_secret"),
|
243 |
+
}
|
244 |
+
),
|
245 |
+
headers={"Content-Type": "application/json; charset=utf-8"},
|
246 |
+
)
|
247 |
app_access_token_res = app_access_token_res.json()
|
248 |
+
if app_access_token_res["code"] != 0:
|
249 |
return redirect("/?error=%s" % app_access_token_res)
|
250 |
|
251 |
+
res = requests.post(
|
252 |
+
FEISHU_OAUTH.get("user_access_token_url"),
|
253 |
+
data=json.dumps(
|
254 |
+
{
|
255 |
+
"grant_type": FEISHU_OAUTH.get("grant_type"),
|
256 |
+
"code": request.args.get("code"),
|
257 |
+
}
|
258 |
+
),
|
259 |
+
headers={
|
260 |
+
"Content-Type": "application/json; charset=utf-8",
|
261 |
+
"Authorization": f"Bearer {app_access_token_res['app_access_token']}",
|
262 |
+
},
|
263 |
+
)
|
264 |
res = res.json()
|
265 |
+
if res["code"] != 0:
|
266 |
return redirect("/?error=%s" % res["message"])
|
267 |
|
268 |
if "contact:user.email:readonly" not in res["data"]["scope"].split(" "):
|
|
|
281 |
except Exception as e:
|
282 |
stat_logger.exception(e)
|
283 |
avatar = ""
|
284 |
+
users = user_register(
|
285 |
+
user_id,
|
286 |
+
{
|
287 |
+
"access_token": session["access_token"],
|
288 |
+
"email": email_address,
|
289 |
+
"avatar": avatar,
|
290 |
+
"nickname": user_info["en_name"],
|
291 |
+
"login_channel": "feishu",
|
292 |
+
"last_login_time": get_format_time(),
|
293 |
+
"is_superuser": False,
|
294 |
+
},
|
295 |
+
)
|
296 |
if not users:
|
297 |
+
raise Exception(f"Fail to register {email_address}.")
|
298 |
if len(users) > 1:
|
299 |
+
raise Exception(f"Same email: {email_address} exists!")
|
300 |
|
301 |
# Try to log in
|
302 |
user = users[0]
|
|
|
317 |
|
318 |
def user_info_from_feishu(access_token):
|
319 |
import requests
|
320 |
+
|
321 |
+
headers = {
|
322 |
+
"Content-Type": "application/json; charset=utf-8",
|
323 |
+
"Authorization": f"Bearer {access_token}",
|
324 |
+
}
|
325 |
res = requests.get(
|
326 |
+
f"https://open.feishu.cn/open-apis/authen/v1/user_info", headers=headers
|
327 |
+
)
|
328 |
user_info = res.json()["data"]
|
329 |
user_info["email"] = None if user_info.get("email") == "" else user_info["email"]
|
330 |
return user_info
|
|
|
332 |
|
333 |
def user_info_from_github(access_token):
|
334 |
import requests
|
335 |
+
|
336 |
+
headers = {"Accept": "application/json", "Authorization": f"token {access_token}"}
|
337 |
res = requests.get(
|
338 |
+
f"https://api.github.com/user?access_token={access_token}", headers=headers
|
339 |
+
)
|
340 |
user_info = res.json()
|
341 |
email_info = requests.get(
|
342 |
f"https://api.github.com/user/emails?access_token={access_token}",
|
343 |
+
headers=headers,
|
344 |
+
).json()
|
345 |
user_info["email"] = next(
|
346 |
+
(email for email in email_info if email["primary"] == True), None
|
347 |
+
)["email"]
|
348 |
return user_info
|
349 |
|
350 |
|
351 |
+
@manager.route("/logout", methods=["GET"])
|
352 |
@login_required
|
353 |
def log_out():
|
354 |
+
"""
|
355 |
+
User logout endpoint.
|
356 |
+
---
|
357 |
+
tags:
|
358 |
+
- User
|
359 |
+
security:
|
360 |
+
- ApiKeyAuth: []
|
361 |
+
responses:
|
362 |
+
200:
|
363 |
+
description: Logout successful.
|
364 |
+
schema:
|
365 |
+
type: object
|
366 |
+
"""
|
367 |
current_user.access_token = ""
|
368 |
current_user.save()
|
369 |
logout_user()
|
|
|
373 |
@manager.route("/setting", methods=["POST"])
|
374 |
@login_required
|
375 |
def setting_user():
|
376 |
+
"""
|
377 |
+
Update user settings.
|
378 |
+
---
|
379 |
+
tags:
|
380 |
+
- User
|
381 |
+
security:
|
382 |
+
- ApiKeyAuth: []
|
383 |
+
parameters:
|
384 |
+
- in: body
|
385 |
+
name: body
|
386 |
+
description: User settings to update.
|
387 |
+
required: true
|
388 |
+
schema:
|
389 |
+
type: object
|
390 |
+
properties:
|
391 |
+
nickname:
|
392 |
+
type: string
|
393 |
+
description: New nickname.
|
394 |
+
email:
|
395 |
+
type: string
|
396 |
+
description: New email.
|
397 |
+
responses:
|
398 |
+
200:
|
399 |
+
description: Settings updated successfully.
|
400 |
+
schema:
|
401 |
+
type: object
|
402 |
+
"""
|
403 |
update_dict = {}
|
404 |
request_data = request.json
|
405 |
if request_data.get("password"):
|
406 |
new_password = request_data.get("new_password")
|
407 |
if not check_password_hash(
|
408 |
+
current_user.password, decrypt(request_data["password"])
|
409 |
+
):
|
410 |
+
return get_json_result(
|
411 |
+
data=False,
|
412 |
+
retcode=RetCode.AUTHENTICATION_ERROR,
|
413 |
+
retmsg="Password error!",
|
414 |
+
)
|
415 |
|
416 |
if new_password:
|
417 |
update_dict["password"] = generate_password_hash(decrypt(new_password))
|
418 |
|
419 |
for k in request_data.keys():
|
420 |
+
if k in [
|
421 |
+
"password",
|
422 |
+
"new_password",
|
423 |
+
"email",
|
424 |
+
"status",
|
425 |
+
"is_superuser",
|
426 |
+
"login_channel",
|
427 |
+
"is_anonymous",
|
428 |
+
"is_active",
|
429 |
+
"is_authenticated",
|
430 |
+
"last_login_time",
|
431 |
+
]:
|
432 |
continue
|
433 |
update_dict[k] = request_data[k]
|
434 |
|
|
|
437 |
return get_json_result(data=True)
|
438 |
except Exception as e:
|
439 |
stat_logger.exception(e)
|
440 |
+
return get_json_result(
|
441 |
+
data=False, retmsg="Update failure!", retcode=RetCode.EXCEPTION_ERROR
|
442 |
+
)
|
443 |
|
444 |
|
445 |
@manager.route("/info", methods=["GET"])
|
446 |
@login_required
|
447 |
def user_profile():
|
448 |
+
"""
|
449 |
+
Get user profile information.
|
450 |
+
---
|
451 |
+
tags:
|
452 |
+
- User
|
453 |
+
security:
|
454 |
+
- ApiKeyAuth: []
|
455 |
+
responses:
|
456 |
+
200:
|
457 |
+
description: User profile retrieved successfully.
|
458 |
+
schema:
|
459 |
+
type: object
|
460 |
+
properties:
|
461 |
+
id:
|
462 |
+
type: string
|
463 |
+
description: User ID.
|
464 |
+
nickname:
|
465 |
+
type: string
|
466 |
+
description: User nickname.
|
467 |
+
email:
|
468 |
+
type: string
|
469 |
+
description: User email.
|
470 |
+
"""
|
471 |
return get_json_result(data=current_user.to_dict())
|
472 |
|
473 |
|
|
|
502 |
"asr_id": ASR_MDL,
|
503 |
"parser_ids": PARSERS,
|
504 |
"img2txt_id": IMAGE2TEXT_MDL,
|
505 |
+
"rerank_id": RERANK_MDL,
|
506 |
}
|
507 |
usr_tenant = {
|
508 |
"tenant_id": user_id,
|
509 |
"user_id": user_id,
|
510 |
"invited_by": user_id,
|
511 |
+
"role": UserTenantRole.OWNER,
|
512 |
}
|
513 |
file_id = get_uuid()
|
514 |
file = {
|
|
|
523 |
}
|
524 |
tenant_llm = []
|
525 |
for llm in LLMService.query(fid=LLM_FACTORY):
|
526 |
+
tenant_llm.append(
|
527 |
+
{
|
528 |
+
"tenant_id": user_id,
|
529 |
+
"llm_factory": LLM_FACTORY,
|
530 |
+
"llm_name": llm.llm_name,
|
531 |
+
"model_type": llm.model_type,
|
532 |
+
"api_key": API_KEY,
|
533 |
+
"api_base": LLM_BASE_URL,
|
534 |
+
}
|
535 |
+
)
|
536 |
|
537 |
if not UserService.save(**user):
|
538 |
return
|
|
|
546 |
@manager.route("/register", methods=["POST"])
|
547 |
@validate_request("nickname", "email", "password")
|
548 |
def user_add():
|
549 |
+
"""
|
550 |
+
Register a new user.
|
551 |
+
---
|
552 |
+
tags:
|
553 |
+
- User
|
554 |
+
parameters:
|
555 |
+
- in: body
|
556 |
+
name: body
|
557 |
+
description: Registration details.
|
558 |
+
required: true
|
559 |
+
schema:
|
560 |
+
type: object
|
561 |
+
properties:
|
562 |
+
nickname:
|
563 |
+
type: string
|
564 |
+
description: User nickname.
|
565 |
+
email:
|
566 |
+
type: string
|
567 |
+
description: User email.
|
568 |
+
password:
|
569 |
+
type: string
|
570 |
+
description: User password.
|
571 |
+
responses:
|
572 |
+
200:
|
573 |
+
description: Registration successful.
|
574 |
+
schema:
|
575 |
+
type: object
|
576 |
+
"""
|
577 |
req = request.json
|
578 |
email_address = req["email"]
|
579 |
|
580 |
# Validate the email address
|
581 |
if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,5}$", email_address):
|
582 |
+
return get_json_result(
|
583 |
+
data=False,
|
584 |
+
retmsg=f"Invalid email address: {email_address}!",
|
585 |
+
retcode=RetCode.OPERATING_ERROR,
|
586 |
+
)
|
587 |
|
588 |
# Check if the email address is already used
|
589 |
if UserService.query(email=email_address):
|
590 |
return get_json_result(
|
591 |
data=False,
|
592 |
+
retmsg=f"Email: {email_address} has already registered!",
|
593 |
+
retcode=RetCode.OPERATING_ERROR,
|
594 |
+
)
|
595 |
|
596 |
# Construct user info data
|
597 |
nickname = req["nickname"]
|
|
|
609 |
try:
|
610 |
users = user_register(user_id, user_dict)
|
611 |
if not users:
|
612 |
+
raise Exception(f"Fail to register {email_address}.")
|
613 |
if len(users) > 1:
|
614 |
+
raise Exception(f"Same email: {email_address} exists!")
|
615 |
user = users[0]
|
616 |
login_user(user)
|
617 |
+
return construct_response(
|
618 |
+
data=user.to_json(),
|
619 |
+
auth=user.get_id(),
|
620 |
+
retmsg=f"{nickname}, welcome aboard!",
|
621 |
+
)
|
622 |
except Exception as e:
|
623 |
rollback_user_registration(user_id)
|
624 |
stat_logger.exception(e)
|
625 |
+
return get_json_result(
|
626 |
+
data=False,
|
627 |
+
retmsg=f"User registration failure, error: {str(e)}",
|
628 |
+
retcode=RetCode.EXCEPTION_ERROR,
|
629 |
+
)
|
630 |
|
631 |
|
632 |
@manager.route("/tenant_info", methods=["GET"])
|
633 |
@login_required
|
634 |
def tenant_info():
|
635 |
+
"""
|
636 |
+
Get tenant information.
|
637 |
+
---
|
638 |
+
tags:
|
639 |
+
- Tenant
|
640 |
+
security:
|
641 |
+
- ApiKeyAuth: []
|
642 |
+
responses:
|
643 |
+
200:
|
644 |
+
description: Tenant information retrieved successfully.
|
645 |
+
schema:
|
646 |
+
type: object
|
647 |
+
properties:
|
648 |
+
tenant_id:
|
649 |
+
type: string
|
650 |
+
description: Tenant ID.
|
651 |
+
name:
|
652 |
+
type: string
|
653 |
+
description: Tenant name.
|
654 |
+
llm_id:
|
655 |
+
type: string
|
656 |
+
description: LLM ID.
|
657 |
+
embd_id:
|
658 |
+
type: string
|
659 |
+
description: Embedding model ID.
|
660 |
+
"""
|
661 |
try:
|
662 |
tenants = TenantService.get_info_by(current_user.id)
|
663 |
if not tenants:
|
|
|
671 |
@login_required
|
672 |
@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id")
|
673 |
def set_tenant_info():
|
674 |
+
"""
|
675 |
+
Update tenant information.
|
676 |
+
---
|
677 |
+
tags:
|
678 |
+
- Tenant
|
679 |
+
security:
|
680 |
+
- ApiKeyAuth: []
|
681 |
+
parameters:
|
682 |
+
- in: body
|
683 |
+
name: body
|
684 |
+
description: Tenant information to update.
|
685 |
+
required: true
|
686 |
+
schema:
|
687 |
+
type: object
|
688 |
+
properties:
|
689 |
+
tenant_id:
|
690 |
+
type: string
|
691 |
+
description: Tenant ID.
|
692 |
+
llm_id:
|
693 |
+
type: string
|
694 |
+
description: LLM ID.
|
695 |
+
embd_id:
|
696 |
+
type: string
|
697 |
+
description: Embedding model ID.
|
698 |
+
asr_id:
|
699 |
+
type: string
|
700 |
+
description: ASR model ID.
|
701 |
+
img2txt_id:
|
702 |
+
type: string
|
703 |
+
description: Image to Text model ID.
|
704 |
+
responses:
|
705 |
+
200:
|
706 |
+
description: Tenant information updated successfully.
|
707 |
+
schema:
|
708 |
+
type: object
|
709 |
+
"""
|
710 |
req = request.json
|
711 |
try:
|
712 |
tid = req["tenant_id"]
|
api/ragflow_server.py
CHANGED
@@ -27,7 +27,11 @@ from api.apps import app
|
|
27 |
from api.db.runtime_config import RuntimeConfig
|
28 |
from api.db.services.document_service import DocumentService
|
29 |
from api.settings import (
|
30 |
-
HOST,
|
|
|
|
|
|
|
|
|
31 |
)
|
32 |
from api import utils
|
33 |
|
@@ -45,27 +49,33 @@ def update_progress():
|
|
45 |
stat_logger.error("update_progress exception:" + str(e))
|
46 |
|
47 |
|
48 |
-
if __name__ ==
|
49 |
-
print(
|
|
|
50 |
____ ___ ______ ______ __
|
51 |
/ __ \ / | / ____// ____// /____ _ __
|
52 |
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
53 |
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
54 |
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
55 |
|
56 |
-
""",
|
57 |
-
|
58 |
-
f'project base: {utils.file_utils.get_project_base_directory()}'
|
59 |
)
|
|
|
60 |
|
61 |
# init db
|
62 |
init_web_db()
|
63 |
init_web_data()
|
64 |
# init runtime config
|
65 |
import argparse
|
|
|
66 |
parser = argparse.ArgumentParser()
|
67 |
-
parser.add_argument(
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
args = parser.parse_args()
|
70 |
if args.version:
|
71 |
print(get_versions())
|
@@ -78,7 +88,7 @@ if __name__ == '__main__':
|
|
78 |
RuntimeConfig.init_env()
|
79 |
RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
|
80 |
|
81 |
-
peewee_logger = logging.getLogger(
|
82 |
peewee_logger.propagate = False
|
83 |
# rag_arch.common.log.ROpenHandler
|
84 |
peewee_logger.addHandler(database_logger.handlers[0])
|
@@ -93,7 +103,14 @@ if __name__ == '__main__':
|
|
93 |
werkzeug_logger = logging.getLogger("werkzeug")
|
94 |
for h in access_logger.handlers:
|
95 |
werkzeug_logger.addHandler(h)
|
96 |
-
run_simple(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
except Exception:
|
98 |
traceback.print_exc()
|
99 |
-
os.kill(os.getpid(), signal.SIGKILL)
|
|
|
27 |
from api.db.runtime_config import RuntimeConfig
|
28 |
from api.db.services.document_service import DocumentService
|
29 |
from api.settings import (
|
30 |
+
HOST,
|
31 |
+
HTTP_PORT,
|
32 |
+
access_logger,
|
33 |
+
database_logger,
|
34 |
+
stat_logger,
|
35 |
)
|
36 |
from api import utils
|
37 |
|
|
|
49 |
stat_logger.error("update_progress exception:" + str(e))
|
50 |
|
51 |
|
52 |
+
if __name__ == "__main__":
|
53 |
+
print(
|
54 |
+
r"""
|
55 |
____ ___ ______ ______ __
|
56 |
/ __ \ / | / ____// ____// /____ _ __
|
57 |
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
58 |
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
59 |
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
60 |
|
61 |
+
""",
|
62 |
+
flush=True,
|
|
|
63 |
)
|
64 |
+
stat_logger.info(f"project base: {utils.file_utils.get_project_base_directory()}")
|
65 |
|
66 |
# init db
|
67 |
init_web_db()
|
68 |
init_web_data()
|
69 |
# init runtime config
|
70 |
import argparse
|
71 |
+
|
72 |
parser = argparse.ArgumentParser()
|
73 |
+
parser.add_argument(
|
74 |
+
"--version", default=False, help="rag flow version", action="store_true"
|
75 |
+
)
|
76 |
+
parser.add_argument(
|
77 |
+
"--debug", default=False, help="debug mode", action="store_true"
|
78 |
+
)
|
79 |
args = parser.parse_args()
|
80 |
if args.version:
|
81 |
print(get_versions())
|
|
|
88 |
RuntimeConfig.init_env()
|
89 |
RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
|
90 |
|
91 |
+
peewee_logger = logging.getLogger("peewee")
|
92 |
peewee_logger.propagate = False
|
93 |
# rag_arch.common.log.ROpenHandler
|
94 |
peewee_logger.addHandler(database_logger.handlers[0])
|
|
|
103 |
werkzeug_logger = logging.getLogger("werkzeug")
|
104 |
for h in access_logger.handlers:
|
105 |
werkzeug_logger.addHandler(h)
|
106 |
+
run_simple(
|
107 |
+
hostname=HOST,
|
108 |
+
port=HTTP_PORT,
|
109 |
+
application=app,
|
110 |
+
threaded=True,
|
111 |
+
use_reloader=RuntimeConfig.DEBUG,
|
112 |
+
use_debugger=RuntimeConfig.DEBUG,
|
113 |
+
)
|
114 |
except Exception:
|
115 |
traceback.print_exc()
|
116 |
+
os.kill(os.getpid(), signal.SIGKILL)
|
poetry.lock
CHANGED
@@ -435,6 +435,17 @@ files = [
|
|
435 |
{file = "Aspose.Slides-24.10.0-py3-none-win_amd64.whl", hash = "sha256:8980015fbc32c1e70e80444c70a642597511300ead6b352183bf74ba3da67f2d"},
|
436 |
]
|
437 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
[[package]]
|
439 |
name = "attrs"
|
440 |
version = "24.2.0"
|
@@ -1912,7 +1923,10 @@ files = [
|
|
1912 |
huggingface-hub = ">=0.20,<1.0"
|
1913 |
loguru = ">=0.7.2,<0.8.0"
|
1914 |
mmh3 = ">=4.0,<5.0"
|
1915 |
-
numpy =
|
|
|
|
|
|
|
1916 |
onnx = ">=1.15.0,<2.0.0"
|
1917 |
onnxruntime = ">=1.17.0,<2.0.0"
|
1918 |
pillow = ">=10.3.0,<11.0.0"
|
@@ -2037,6 +2051,24 @@ sentence_transformers = "*"
|
|
2037 |
torch = ">=1.6.0"
|
2038 |
transformers = ">=4.33.0"
|
2039 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2040 |
[[package]]
|
2041 |
name = "flask"
|
2042 |
version = "3.0.3"
|
@@ -4381,6 +4413,17 @@ httpx = ">=0.25,<1"
|
|
4381 |
orjson = ">=3.9.10,<3.11"
|
4382 |
pydantic = ">=2.5.2,<3"
|
4383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4384 |
[[package]]
|
4385 |
name = "mkl"
|
4386 |
version = "2021.4.0"
|
@@ -5149,7 +5192,10 @@ files = [
|
|
5149 |
]
|
5150 |
|
5151 |
[package.dependencies]
|
5152 |
-
numpy =
|
|
|
|
|
|
|
5153 |
|
5154 |
[[package]]
|
5155 |
name = "opencv-python-headless"
|
@@ -5168,7 +5214,10 @@ files = [
|
|
5168 |
]
|
5169 |
|
5170 |
[package.dependencies]
|
5171 |
-
numpy =
|
|
|
|
|
|
|
5172 |
|
5173 |
[[package]]
|
5174 |
name = "openpyxl"
|
@@ -5350,7 +5399,10 @@ files = [
|
|
5350 |
]
|
5351 |
|
5352 |
[package.dependencies]
|
5353 |
-
numpy =
|
|
|
|
|
|
|
5354 |
python-dateutil = ">=2.8.2"
|
5355 |
pytz = ">=2020.1"
|
5356 |
tzdata = ">=2022.7"
|
@@ -7009,6 +7061,24 @@ lxml = "*"
|
|
7009 |
[package.extras]
|
7010 |
test = ["timeout-decorator"]
|
7011 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7012 |
[[package]]
|
7013 |
name = "referencing"
|
7014 |
version = "0.35.1"
|
@@ -8468,6 +8538,7 @@ nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"
|
|
8468 |
nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
8469 |
nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
8470 |
sympy = "*"
|
|
|
8471 |
typing-extensions = ">=4.8.0"
|
8472 |
|
8473 |
[package.extras]
|
@@ -8611,6 +8682,29 @@ files = [
|
|
8611 |
trio = ">=0.11"
|
8612 |
wsproto = ">=0.14"
|
8613 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8614 |
[[package]]
|
8615 |
name = "typer"
|
8616 |
version = "0.12.5"
|
@@ -9446,5 +9540,5 @@ files = [
|
|
9446 |
|
9447 |
[metadata]
|
9448 |
lock-version = "2.0"
|
9449 |
-
python-versions = ">=3.
|
9450 |
-
content-hash = "
|
|
|
435 |
{file = "Aspose.Slides-24.10.0-py3-none-win_amd64.whl", hash = "sha256:8980015fbc32c1e70e80444c70a642597511300ead6b352183bf74ba3da67f2d"},
|
436 |
]
|
437 |
|
438 |
+
[[package]]
|
439 |
+
name = "async-timeout"
|
440 |
+
version = "4.0.3"
|
441 |
+
description = "Timeout context manager for asyncio programs"
|
442 |
+
optional = false
|
443 |
+
python-versions = ">=3.7"
|
444 |
+
files = [
|
445 |
+
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
|
446 |
+
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
|
447 |
+
]
|
448 |
+
|
449 |
[[package]]
|
450 |
name = "attrs"
|
451 |
version = "24.2.0"
|
|
|
1923 |
huggingface-hub = ">=0.20,<1.0"
|
1924 |
loguru = ">=0.7.2,<0.8.0"
|
1925 |
mmh3 = ">=4.0,<5.0"
|
1926 |
+
numpy = [
|
1927 |
+
{version = ">=1.21,<2", markers = "python_version < \"3.12\""},
|
1928 |
+
{version = ">=1.26,<2", markers = "python_version >= \"3.12\""},
|
1929 |
+
]
|
1930 |
onnx = ">=1.15.0,<2.0.0"
|
1931 |
onnxruntime = ">=1.17.0,<2.0.0"
|
1932 |
pillow = ">=10.3.0,<11.0.0"
|
|
|
2051 |
torch = ">=1.6.0"
|
2052 |
transformers = ">=4.33.0"
|
2053 |
|
2054 |
+
[[package]]
|
2055 |
+
name = "flasgger"
|
2056 |
+
version = "0.9.7.1"
|
2057 |
+
description = "Extract swagger specs from your flask project"
|
2058 |
+
optional = false
|
2059 |
+
python-versions = "*"
|
2060 |
+
files = [
|
2061 |
+
{file = "flasgger-0.9.7.1.tar.gz", hash = "sha256:ca098e10bfbb12f047acc6299cc70a33851943a746e550d86e65e60d4df245fb"},
|
2062 |
+
]
|
2063 |
+
|
2064 |
+
[package.dependencies]
|
2065 |
+
Flask = ">=0.10"
|
2066 |
+
jsonschema = ">=3.0.1"
|
2067 |
+
mistune = "*"
|
2068 |
+
packaging = "*"
|
2069 |
+
PyYAML = ">=3.0"
|
2070 |
+
six = ">=1.10.0"
|
2071 |
+
|
2072 |
[[package]]
|
2073 |
name = "flask"
|
2074 |
version = "3.0.3"
|
|
|
4413 |
orjson = ">=3.9.10,<3.11"
|
4414 |
pydantic = ">=2.5.2,<3"
|
4415 |
|
4416 |
+
[[package]]
|
4417 |
+
name = "mistune"
|
4418 |
+
version = "3.0.2"
|
4419 |
+
description = "A sane and fast Markdown parser with useful plugins and renderers"
|
4420 |
+
optional = false
|
4421 |
+
python-versions = ">=3.7"
|
4422 |
+
files = [
|
4423 |
+
{file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"},
|
4424 |
+
{file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"},
|
4425 |
+
]
|
4426 |
+
|
4427 |
[[package]]
|
4428 |
name = "mkl"
|
4429 |
version = "2021.4.0"
|
|
|
5192 |
]
|
5193 |
|
5194 |
[package.dependencies]
|
5195 |
+
numpy = [
|
5196 |
+
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
|
5197 |
+
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
5198 |
+
]
|
5199 |
|
5200 |
[[package]]
|
5201 |
name = "opencv-python-headless"
|
|
|
5214 |
]
|
5215 |
|
5216 |
[package.dependencies]
|
5217 |
+
numpy = [
|
5218 |
+
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
|
5219 |
+
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
5220 |
+
]
|
5221 |
|
5222 |
[[package]]
|
5223 |
name = "openpyxl"
|
|
|
5399 |
]
|
5400 |
|
5401 |
[package.dependencies]
|
5402 |
+
numpy = [
|
5403 |
+
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
5404 |
+
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
5405 |
+
]
|
5406 |
python-dateutil = ">=2.8.2"
|
5407 |
pytz = ">=2020.1"
|
5408 |
tzdata = ">=2022.7"
|
|
|
7061 |
[package.extras]
|
7062 |
test = ["timeout-decorator"]
|
7063 |
|
7064 |
+
[[package]]
|
7065 |
+
name = "redis"
|
7066 |
+
version = "5.0.3"
|
7067 |
+
description = "Python client for Redis database and key-value store"
|
7068 |
+
optional = false
|
7069 |
+
python-versions = ">=3.7"
|
7070 |
+
files = [
|
7071 |
+
{file = "redis-5.0.3-py3-none-any.whl", hash = "sha256:5da9b8fe9e1254293756c16c008e8620b3d15fcc6dde6babde9541850e72a32d"},
|
7072 |
+
{file = "redis-5.0.3.tar.gz", hash = "sha256:4973bae7444c0fbed64a06b87446f79361cb7e4ec1538c022d696ed7a5015580"},
|
7073 |
+
]
|
7074 |
+
|
7075 |
+
[package.dependencies]
|
7076 |
+
async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""}
|
7077 |
+
|
7078 |
+
[package.extras]
|
7079 |
+
hiredis = ["hiredis (>=1.0.0)"]
|
7080 |
+
ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"]
|
7081 |
+
|
7082 |
[[package]]
|
7083 |
name = "referencing"
|
7084 |
version = "0.35.1"
|
|
|
8538 |
nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
8539 |
nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
8540 |
sympy = "*"
|
8541 |
+
triton = {version = "2.3.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
|
8542 |
typing-extensions = ">=4.8.0"
|
8543 |
|
8544 |
[package.extras]
|
|
|
8682 |
trio = ">=0.11"
|
8683 |
wsproto = ">=0.14"
|
8684 |
|
8685 |
+
[[package]]
|
8686 |
+
name = "triton"
|
8687 |
+
version = "2.3.0"
|
8688 |
+
description = "A language and compiler for custom Deep Learning operations"
|
8689 |
+
optional = false
|
8690 |
+
python-versions = "*"
|
8691 |
+
files = [
|
8692 |
+
{file = "triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ce4b8ff70c48e47274c66f269cce8861cf1dc347ceeb7a67414ca151b1822d8"},
|
8693 |
+
{file = "triton-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c3d9607f85103afdb279938fc1dd2a66e4f5999a58eb48a346bd42738f986dd"},
|
8694 |
+
{file = "triton-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218d742e67480d9581bafb73ed598416cc8a56f6316152e5562ee65e33de01c0"},
|
8695 |
+
{file = "triton-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381ec6b3dac06922d3e4099cfc943ef032893b25415de295e82b1a82b0359d2c"},
|
8696 |
+
{file = "triton-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:038e06a09c06a164fef9c48de3af1e13a63dc1ba3c792871e61a8e79720ea440"},
|
8697 |
+
{file = "triton-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d8f636e0341ac348899a47a057c3daea99ea7db31528a225a3ba4ded28ccc65"},
|
8698 |
+
]
|
8699 |
+
|
8700 |
+
[package.dependencies]
|
8701 |
+
filelock = "*"
|
8702 |
+
|
8703 |
+
[package.extras]
|
8704 |
+
build = ["cmake (>=3.20)", "lit"]
|
8705 |
+
tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
|
8706 |
+
tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
|
8707 |
+
|
8708 |
[[package]]
|
8709 |
name = "typer"
|
8710 |
version = "0.12.5"
|
|
|
9540 |
|
9541 |
[metadata]
|
9542 |
lock-version = "2.0"
|
9543 |
+
python-versions = ">=3.11,<3.13"
|
9544 |
+
content-hash = "74a9b4afef47cc36d638b43fd918ece27d65259af1ca9e5b17f6b239774e8bf9"
|
pyproject.toml
CHANGED
@@ -8,7 +8,7 @@ readme = "README.md"
|
|
8 |
package-mode = false
|
9 |
|
10 |
[tool.poetry.dependencies]
|
11 |
-
python = ">=3.
|
12 |
datrie = "0.8.2"
|
13 |
akshare = "^1.14.81"
|
14 |
azure-storage-blob = "12.22.0"
|
@@ -114,6 +114,7 @@ graspologic = "^3.4.1"
|
|
114 |
pymysql = "^1.1.1"
|
115 |
mini-racer = "^0.12.4"
|
116 |
pyicu = "^2.13.1"
|
|
|
117 |
|
118 |
|
119 |
[tool.poetry.group.full]
|
|
|
8 |
package-mode = false
|
9 |
|
10 |
[tool.poetry.dependencies]
|
11 |
+
python = ">=3.11,<3.13"
|
12 |
datrie = "0.8.2"
|
13 |
akshare = "^1.14.81"
|
14 |
azure-storage-blob = "12.22.0"
|
|
|
114 |
pymysql = "^1.1.1"
|
115 |
mini-racer = "^0.12.4"
|
116 |
pyicu = "^2.13.1"
|
117 |
+
flasgger = "^0.9.7.1"
|
118 |
|
119 |
|
120 |
[tool.poetry.group.full]
|