Add upload file by knowledge base name API. (#539)
Browse files### What problem does this PR solve?
Add upload file by knowledge base name API.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
---------
Co-authored-by: chrysanthemum-boy <fannc@qq.com>
- api/apps/api_app.py +81 -1
- api/db/services/knowledgebase_service.py +17 -2
- docs/conversation_api.md +57 -1
api/apps/api_app.py
CHANGED
@@ -13,18 +13,28 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
|
|
16 |
from datetime import datetime, timedelta
|
17 |
from flask import request
|
18 |
from flask_login import login_required, current_user
|
|
|
|
|
19 |
from api.db.db_models import APIToken, API4Conversation
|
|
|
20 |
from api.db.services.api_service import APITokenService, API4ConversationService
|
21 |
from api.db.services.dialog_service import DialogService, chat
|
|
|
|
|
22 |
from api.db.services.user_service import UserTenantService
|
23 |
from api.settings import RetCode
|
24 |
from api.utils import get_uuid, current_timestamp, datetime_format
|
25 |
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
26 |
from itsdangerous import URLSafeTimedSerializer
|
27 |
|
|
|
|
|
|
|
28 |
|
29 |
def generate_confirmation_token(tenent_id):
|
30 |
serializer = URLSafeTimedSerializer(tenent_id)
|
@@ -191,4 +201,74 @@ def get(conversation_id):
|
|
191 |
|
192 |
return get_json_result(data=conv.to_dict())
|
193 |
except Exception as e:
|
194 |
-
return server_error_response(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import os
|
17 |
+
import re
|
18 |
from datetime import datetime, timedelta
|
19 |
from flask import request
|
20 |
from flask_login import login_required, current_user
|
21 |
+
|
22 |
+
from api.db import FileType, ParserType
|
23 |
from api.db.db_models import APIToken, API4Conversation
|
24 |
+
from api.db.services import duplicate_name
|
25 |
from api.db.services.api_service import APITokenService, API4ConversationService
|
26 |
from api.db.services.dialog_service import DialogService, chat
|
27 |
+
from api.db.services.document_service import DocumentService
|
28 |
+
from api.db.services.knowledgebase_service import KnowledgebaseService
|
29 |
from api.db.services.user_service import UserTenantService
|
30 |
from api.settings import RetCode
|
31 |
from api.utils import get_uuid, current_timestamp, datetime_format
|
32 |
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
33 |
from itsdangerous import URLSafeTimedSerializer
|
34 |
|
35 |
+
from api.utils.file_utils import filename_type, thumbnail
|
36 |
+
from rag.utils import MINIO
|
37 |
+
|
38 |
|
39 |
def generate_confirmation_token(tenent_id):
|
40 |
serializer = URLSafeTimedSerializer(tenent_id)
|
|
|
201 |
|
202 |
return get_json_result(data=conv.to_dict())
|
203 |
except Exception as e:
|
204 |
+
return server_error_response(e)
|
205 |
+
|
206 |
+
|
207 |
+
@manager.route('/document/upload', methods=['POST'])
|
208 |
+
@validate_request("kb_name")
|
209 |
+
def upload():
|
210 |
+
token = request.headers.get('Authorization').split()[1]
|
211 |
+
objs = APIToken.query(token=token)
|
212 |
+
if not objs:
|
213 |
+
return get_json_result(
|
214 |
+
data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
|
215 |
+
|
216 |
+
kb_name = request.form.get("kb_name").strip()
|
217 |
+
tenant_id = objs[0].tenant_id
|
218 |
+
|
219 |
+
try:
|
220 |
+
e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id)
|
221 |
+
if not e:
|
222 |
+
return get_data_error_result(
|
223 |
+
retmsg="Can't find this knowledgebase!")
|
224 |
+
kb_id = kb.id
|
225 |
+
except Exception as e:
|
226 |
+
return server_error_response(e)
|
227 |
+
|
228 |
+
if 'file' not in request.files:
|
229 |
+
return get_json_result(
|
230 |
+
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
231 |
+
|
232 |
+
file = request.files['file']
|
233 |
+
if file.filename == '':
|
234 |
+
return get_json_result(
|
235 |
+
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
236 |
+
try:
|
237 |
+
if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)):
|
238 |
+
return get_data_error_result(
|
239 |
+
retmsg="Exceed the maximum file number of a free user!")
|
240 |
+
|
241 |
+
filename = duplicate_name(
|
242 |
+
DocumentService.query,
|
243 |
+
name=file.filename,
|
244 |
+
kb_id=kb_id)
|
245 |
+
filetype = filename_type(filename)
|
246 |
+
if not filetype:
|
247 |
+
return get_data_error_result(
|
248 |
+
retmsg="This type of file has not been supported yet!")
|
249 |
+
|
250 |
+
location = filename
|
251 |
+
while MINIO.obj_exist(kb_id, location):
|
252 |
+
location += "_"
|
253 |
+
blob = request.files['file'].read()
|
254 |
+
MINIO.put(kb_id, location, blob)
|
255 |
+
doc = {
|
256 |
+
"id": get_uuid(),
|
257 |
+
"kb_id": kb.id,
|
258 |
+
"parser_id": kb.parser_id,
|
259 |
+
"parser_config": kb.parser_config,
|
260 |
+
"created_by": kb.tenant_id,
|
261 |
+
"type": filetype,
|
262 |
+
"name": filename,
|
263 |
+
"location": location,
|
264 |
+
"size": len(blob),
|
265 |
+
"thumbnail": thumbnail(filename, blob)
|
266 |
+
}
|
267 |
+
if doc["type"] == FileType.VISUAL:
|
268 |
+
doc["parser_id"] = ParserType.PICTURE.value
|
269 |
+
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
270 |
+
doc["parser_id"] = ParserType.PRESENTATION.value
|
271 |
+
doc = DocumentService.insert(doc)
|
272 |
+
return get_json_result(data=doc.to_json())
|
273 |
+
except Exception as e:
|
274 |
+
return server_error_response(e)
|
api/db/services/knowledgebase_service.py
CHANGED
@@ -27,7 +27,8 @@ class KnowledgebaseService(CommonService):
|
|
27 |
page_number, items_per_page, orderby, desc):
|
28 |
kbs = cls.model.select().where(
|
29 |
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
30 |
-
|
|
|
31 |
& (cls.model.status == StatusEnum.VALID.value)
|
32 |
)
|
33 |
if desc:
|
@@ -56,7 +57,8 @@ class KnowledgebaseService(CommonService):
|
|
56 |
cls.model.chunk_num,
|
57 |
cls.model.parser_id,
|
58 |
cls.model.parser_config]
|
59 |
-
kbs = cls.model.select(*fields).join(Tenant, on=(
|
|
|
60 |
(cls.model.id == kb_id),
|
61 |
(cls.model.status == StatusEnum.VALID.value)
|
62 |
)
|
@@ -86,6 +88,7 @@ class KnowledgebaseService(CommonService):
|
|
86 |
old[k] = list(set(old[k] + v))
|
87 |
else:
|
88 |
old[k] = v
|
|
|
89 |
dfs_update(m.parser_config, config)
|
90 |
cls.update_by_id(id, {"parser_config": m.parser_config})
|
91 |
|
@@ -97,3 +100,15 @@ class KnowledgebaseService(CommonService):
|
|
97 |
if k.parser_config and "field_map" in k.parser_config:
|
98 |
conf.update(k.parser_config["field_map"])
|
99 |
return conf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
page_number, items_per_page, orderby, desc):
|
28 |
kbs = cls.model.select().where(
|
29 |
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
30 |
+
TenantPermission.TEAM.value)) | (
|
31 |
+
cls.model.tenant_id == user_id))
|
32 |
& (cls.model.status == StatusEnum.VALID.value)
|
33 |
)
|
34 |
if desc:
|
|
|
57 |
cls.model.chunk_num,
|
58 |
cls.model.parser_id,
|
59 |
cls.model.parser_config]
|
60 |
+
kbs = cls.model.select(*fields).join(Tenant, on=(
|
61 |
+
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
|
62 |
(cls.model.id == kb_id),
|
63 |
(cls.model.status == StatusEnum.VALID.value)
|
64 |
)
|
|
|
88 |
old[k] = list(set(old[k] + v))
|
89 |
else:
|
90 |
old[k] = v
|
91 |
+
|
92 |
dfs_update(m.parser_config, config)
|
93 |
cls.update_by_id(id, {"parser_config": m.parser_config})
|
94 |
|
|
|
100 |
if k.parser_config and "field_map" in k.parser_config:
|
101 |
conf.update(k.parser_config["field_map"])
|
102 |
return conf
|
103 |
+
|
104 |
+
@classmethod
|
105 |
+
@DB.connection_context()
|
106 |
+
def get_by_name(cls, kb_name, tenant_id):
|
107 |
+
kb = cls.model.select().where(
|
108 |
+
(cls.model.name == kb_name)
|
109 |
+
& (cls.model.tenant_id == tenant_id)
|
110 |
+
& (cls.model.status == StatusEnum.VALID.value)
|
111 |
+
)
|
112 |
+
if kb:
|
113 |
+
return True, kb[0]
|
114 |
+
return False, None
|
docs/conversation_api.md
CHANGED
@@ -303,5 +303,61 @@ This will be called to get the answer to users' questions.
|
|
303 |
## Get document content or image
|
304 |
|
305 |
This is usually used when display content of citation.
|
306 |
-
### Path: /document/get/\<id\>
|
307 |
### Method: GET
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
## Get document content or image
|
304 |
|
305 |
This is usually used when display content of citation.
|
306 |
+
### Path: /api/document/get/\<id\>
|
307 |
### Method: GET
|
308 |
+
|
309 |
+
## Upload file
|
310 |
+
|
311 |
+
This is usually used when upload a file to.
|
312 |
+
### Path: /api/document/upload/
|
313 |
+
### Method: POST
|
314 |
+
|
315 |
+
### Parameter:
|
316 |
+
|
317 |
+
| name | type | optional | description |
|
318 |
+
|---------|--------|----------|----------------------------------------|
|
319 |
+
| file | file | No | Upload file. |
|
320 |
+
| kb_name | string | No | Choose the upload knowledge base name. |
|
321 |
+
|
322 |
+
### Response
|
323 |
+
```json
|
324 |
+
{
|
325 |
+
"data": {
|
326 |
+
"chunk_num": 0,
|
327 |
+
"create_date": "Thu, 25 Apr 2024 14:30:06 GMT",
|
328 |
+
"create_time": 1714026606921,
|
329 |
+
"created_by": "553ec818fd5711ee8ea63043d7ed348e",
|
330 |
+
"id": "41e9324602cd11ef9f5f3043d7ed348e",
|
331 |
+
"kb_id": "06802686c0a311ee85d6246e9694c130",
|
332 |
+
"location": "readme.txt",
|
333 |
+
"name": "readme.txt",
|
334 |
+
"parser_config": {
|
335 |
+
"field_map": {
|
336 |
+
},
|
337 |
+
"pages": [
|
338 |
+
[
|
339 |
+
0,
|
340 |
+
1000000
|
341 |
+
]
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"parser_id": "general",
|
345 |
+
"process_begin_at": null,
|
346 |
+
"process_duation": 0.0,
|
347 |
+
"progress": 0.0,
|
348 |
+
"progress_msg": "",
|
349 |
+
"run": "0",
|
350 |
+
"size": 929,
|
351 |
+
"source_type": "local",
|
352 |
+
"status": "1",
|
353 |
+
"thumbnail": null,
|
354 |
+
"token_num": 0,
|
355 |
+
"type": "doc",
|
356 |
+
"update_date": "Thu, 25 Apr 2024 14:30:06 GMT",
|
357 |
+
"update_time": 1714026606921
|
358 |
+
},
|
359 |
+
"retcode": 0,
|
360 |
+
"retmsg": "success"
|
361 |
+
}
|
362 |
+
|
363 |
+
```
|