liuhua liuhua commited on
Commit
2e482fd
·
1 Parent(s): db736e5

Refactor Dataset API (#2783)

Browse files

### What problem does this PR solve?

Refactor Dataset API

### Type of change

- [x] Refactoring

---------

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>

api/apps/__init__.py CHANGED
@@ -83,7 +83,7 @@ def register_page(page_path):
83
  sys.modules[module_name] = page
84
  spec.loader.exec_module(page)
85
  page_name = getattr(page, 'page_name', page_name)
86
- url_prefix = f'/api/{API_VERSION}/{page_name}' if "/sdk/" in path else f'/{API_VERSION}/{page_name}'
87
 
88
  app.register_blueprint(page.manager, url_prefix=url_prefix)
89
  return url_prefix
 
83
  sys.modules[module_name] = page
84
  spec.loader.exec_module(page)
85
  page_name = getattr(page, 'page_name', page_name)
86
+ url_prefix = f'/api/{API_VERSION}' if "/sdk/" in path else f'/{API_VERSION}/{page_name}'
87
 
88
  app.register_blueprint(page.manager, url_prefix=url_prefix)
89
  return url_prefix
api/apps/sdk/dataset.py CHANGED
@@ -25,143 +25,146 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
25
  from api.db.services.user_service import TenantService
26
  from api.settings import RetCode
27
  from api.utils import get_uuid
28
- from api.utils.api_utils import get_json_result, token_required, get_data_error_result
29
 
30
-
31
- @manager.route('/save', methods=['POST'])
32
  @token_required
33
- def save(tenant_id):
34
  req = request.json
35
  e, t = TenantService.get_by_id(tenant_id)
36
- if "id" not in req:
37
- if "tenant_id" in req or "embedding_model" in req:
38
- return get_data_error_result(
39
- retmsg="Tenant_id or embedding_model must not be provided")
40
- if "name" not in req:
41
- return get_data_error_result(
42
- retmsg="Name is not empty!")
43
- req['id'] = get_uuid()
44
- req["name"] = req["name"].strip()
45
- if req["name"] == "":
46
- return get_data_error_result(
47
- retmsg="Name is not empty string!")
48
- if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
49
- return get_data_error_result(
50
- retmsg="Duplicated knowledgebase name in creating dataset.")
51
- req["tenant_id"] = req['created_by'] = tenant_id
52
- req['embedding_model'] = t.embd_id
53
- key_mapping = {
54
- "chunk_num": "chunk_count",
55
- "doc_num": "document_count",
56
- "parser_id": "parse_method",
57
- "embd_id": "embedding_model"
58
- }
59
- mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
60
- req.update(mapped_keys)
61
- if not KnowledgebaseService.save(**req):
62
- return get_data_error_result(retmsg="Create dataset error.(Database error)")
63
- renamed_data = {}
64
- e, k = KnowledgebaseService.get_by_id(req["id"])
65
- for key, value in k.to_dict().items():
66
- new_key = key_mapping.get(key, key)
67
- renamed_data[new_key] = value
68
- return get_json_result(data=renamed_data)
69
- else:
70
- invalid_keys = {"embd_id", "chunk_num", "doc_num", "parser_id"}
71
- if any(key in req for key in invalid_keys):
72
- return get_data_error_result(retmsg="The input parameters are invalid.")
73
-
74
- if "tenant_id" in req:
75
- if req["tenant_id"] != tenant_id:
76
- return get_data_error_result(
77
- retmsg="Can't change tenant_id.")
78
-
79
- if "embedding_model" in req:
80
- if req["embedding_model"] != t.embd_id:
81
- return get_data_error_result(
82
- retmsg="Can't change embedding_model.")
83
- req.pop("embedding_model")
84
-
85
- if not KnowledgebaseService.query(
86
- created_by=tenant_id, id=req["id"]):
87
- return get_json_result(
88
- data=False, retmsg='You do not own the dataset.',
89
- retcode=RetCode.OPERATING_ERROR)
90
-
91
- if not req["id"]:
92
- return get_data_error_result(
93
- retmsg="id can not be empty.")
94
- e, kb = KnowledgebaseService.get_by_id(req["id"])
95
-
96
- if "chunk_count" in req:
97
- if req["chunk_count"] != kb.chunk_num:
98
- return get_data_error_result(
99
- retmsg="Can't change chunk_count.")
100
- req.pop("chunk_count")
101
-
102
- if "document_count" in req:
103
- if req['document_count'] != kb.doc_num:
104
- return get_data_error_result(
105
- retmsg="Can't change document_count.")
106
- req.pop("document_count")
107
-
108
- if "parse_method" in req:
109
- if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id:
110
- return get_data_error_result(
111
- retmsg="If chunk count is not 0, parse method is not changable.")
112
- req['parser_id'] = req.pop('parse_method')
113
- if "name" in req:
114
- req["name"] = req["name"].strip()
115
- if req["name"].lower() != kb.name.lower() \
116
- and len(KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id,
117
- status=StatusEnum.VALID.value)) > 0:
118
- return get_data_error_result(
119
- retmsg="Duplicated knowledgebase name in updating dataset.")
120
-
121
- del req["id"]
122
- if not KnowledgebaseService.update_by_id(kb.id, req):
123
- return get_data_error_result(retmsg="Update dataset error.(Database error)")
124
- return get_json_result(data=True)
125
-
126
 
127
- @manager.route('/delete', methods=['DELETE'])
128
  @token_required
129
  def delete(tenant_id):
130
- req = request.args
131
- if "id" not in req:
132
- return get_data_error_result(
133
- retmsg="id is required")
134
- kbs = KnowledgebaseService.query(
135
- created_by=tenant_id, id=req["id"])
136
- if not kbs:
137
- return get_json_result(
138
- data=False, retmsg='You do not own the dataset',
139
- retcode=RetCode.OPERATING_ERROR)
140
-
141
- for doc in DocumentService.query(kb_id=req["id"]):
142
- if not DocumentService.remove_document(doc, kbs[0].tenant_id):
143
- return get_data_error_result(
144
- retmsg="Remove document error.(Database error)")
145
- f2d = File2DocumentService.get_by_document_id(doc.id)
146
- FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
147
- File2DocumentService.delete_by_document_id(doc.id)
148
-
149
- if not KnowledgebaseService.delete_by_id(req["id"]):
150
- return get_data_error_result(
151
- retmsg="Delete dataset error.(Database serror)")
152
- return get_json_result(data=True)
153
-
154
-
155
- @manager.route('/list', methods=['GET'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  @token_required
157
- def list_datasets(tenant_id):
 
 
 
 
 
158
  page_number = int(request.args.get("page", 1))
159
  items_per_page = int(request.args.get("page_size", 1024))
160
  orderby = request.args.get("orderby", "create_time")
161
  desc = bool(request.args.get("desc", True))
162
  tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
163
- kbs = KnowledgebaseService.get_by_tenant_ids(
164
- [m["tenant_id"] for m in tenants], tenant_id, page_number, items_per_page, orderby, desc)
165
  renamed_list = []
166
  for kb in kbs:
167
  key_mapping = {
@@ -175,50 +178,4 @@ def list_datasets(tenant_id):
175
  new_key = key_mapping.get(key, key)
176
  renamed_data[new_key] = value
177
  renamed_list.append(renamed_data)
178
- return get_json_result(data=renamed_list)
179
-
180
-
181
- @manager.route('/detail', methods=['GET'])
182
- @token_required
183
- def detail(tenant_id):
184
- req = request.args
185
- key_mapping = {
186
- "chunk_num": "chunk_count",
187
- "doc_num": "document_count",
188
- "parser_id": "parse_method",
189
- "embd_id": "embedding_model"
190
- }
191
- renamed_data = {}
192
- if "id" in req:
193
- id = req["id"]
194
- kb = KnowledgebaseService.query(created_by=tenant_id, id=req["id"])
195
- if not kb:
196
- return get_json_result(
197
- data=False, retmsg='You do not own the dataset.',
198
- retcode=RetCode.OPERATING_ERROR)
199
- if "name" in req:
200
- name = req["name"]
201
- if kb[0].name != name:
202
- return get_json_result(
203
- data=False, retmsg='You do not own the dataset.',
204
- retcode=RetCode.OPERATING_ERROR)
205
- e, k = KnowledgebaseService.get_by_id(id)
206
- for key, value in k.to_dict().items():
207
- new_key = key_mapping.get(key, key)
208
- renamed_data[new_key] = value
209
- return get_json_result(data=renamed_data)
210
- else:
211
- if "name" in req:
212
- name = req["name"]
213
- e, k = KnowledgebaseService.get_by_name(kb_name=name, tenant_id=tenant_id)
214
- if not e:
215
- return get_json_result(
216
- data=False, retmsg='You do not own the dataset.',
217
- retcode=RetCode.OPERATING_ERROR)
218
- for key, value in k.to_dict().items():
219
- new_key = key_mapping.get(key, key)
220
- renamed_data[new_key] = value
221
- return get_json_result(data=renamed_data)
222
- else:
223
- return get_data_error_result(
224
- retmsg="At least one of `id` or `name` must be provided.")
 
25
  from api.db.services.user_service import TenantService
26
  from api.settings import RetCode
27
  from api.utils import get_uuid
28
+ from api.utils.api_utils import get_result, token_required,get_error_data_result
29
 
30
+ @manager.route('/dataset', methods=['POST'])
 
31
  @token_required
32
+ def create(tenant_id):
33
  req = request.json
34
  e, t = TenantService.get_by_id(tenant_id)
35
+ if "tenant_id" in req or "embedding_model" in req:
36
+ return get_error_data_result(
37
+ retmsg="Tenant_id or embedding_model must not be provided")
38
+ chunk_count=req.get("chunk_count")
39
+ document_count=req.get("document_count")
40
+ if chunk_count or document_count:
41
+ return get_error_data_result(retmsg="chunk_count or document_count must be 0 or not be provided")
42
+ if "name" not in req:
43
+ return get_error_data_result(
44
+ retmsg="Name is not empty!")
45
+ req['id'] = get_uuid()
46
+ req["name"] = req["name"].strip()
47
+ if req["name"] == "":
48
+ return get_error_data_result(
49
+ retmsg="Name is not empty string!")
50
+ if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
51
+ return get_error_data_result(
52
+ retmsg="Duplicated knowledgebase name in creating dataset.")
53
+ req["tenant_id"] = req['created_by'] = tenant_id
54
+ req['embedding_model'] = t.embd_id
55
+ key_mapping = {
56
+ "chunk_num": "chunk_count",
57
+ "doc_num": "document_count",
58
+ "parser_id": "parse_method",
59
+ "embd_id": "embedding_model"
60
+ }
61
+ mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
62
+ req.update(mapped_keys)
63
+ if not KnowledgebaseService.save(**req):
64
+ return get_error_data_result(retmsg="Create dataset error.(Database error)")
65
+ renamed_data = {}
66
+ e, k = KnowledgebaseService.get_by_id(req["id"])
67
+ for key, value in k.to_dict().items():
68
+ new_key = key_mapping.get(key, key)
69
+ renamed_data[new_key] = value
70
+ return get_result(data=renamed_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ @manager.route('/dataset', methods=['DELETE'])
73
  @token_required
74
  def delete(tenant_id):
75
+ req = request.json
76
+ names=req.get("names")
77
+ ids = req.get("ids")
78
+ if not ids and not names:
79
+ return get_error_data_result(
80
+ retmsg="ids or names is required")
81
+ id_list=[]
82
+ if names:
83
+ for name in names:
84
+ kbs=KnowledgebaseService.query(name=name,tenant_id=tenant_id)
85
+ if not kbs:
86
+ return get_error_data_result(retmsg=f"You don't own the dataset {name}")
87
+ id_list.append(kbs[0].id)
88
+ if ids:
89
+ for id in ids:
90
+ kbs=KnowledgebaseService.query(id=id,tenant_id=tenant_id)
91
+ if not kbs:
92
+ return get_error_data_result(retmsg=f"You don't own the dataset {id}")
93
+ id_list.extend(ids)
94
+ for id in id_list:
95
+ for doc in DocumentService.query(kb_id=id):
96
+ if not DocumentService.remove_document(doc, tenant_id):
97
+ return get_error_data_result(
98
+ retmsg="Remove document error.(Database error)")
99
+ f2d = File2DocumentService.get_by_document_id(doc.id)
100
+ FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
101
+ File2DocumentService.delete_by_document_id(doc.id)
102
+ if not KnowledgebaseService.delete_by_id(id):
103
+ return get_error_data_result(
104
+ retmsg="Delete dataset error.(Database serror)")
105
+ return get_result(retcode=RetCode.SUCCESS)
106
+
107
+ @manager.route('/dataset/<dataset_id>', methods=['PUT'])
108
+ @token_required
109
+ def update(tenant_id,dataset_id):
110
+ if not KnowledgebaseService.query(id=dataset_id,tenant_id=tenant_id):
111
+ return get_error_data_result(retmsg="You don't own the dataset")
112
+ req = request.json
113
+ e, t = TenantService.get_by_id(tenant_id)
114
+ invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"}
115
+ if any(key in req for key in invalid_keys):
116
+ return get_error_data_result(retmsg="The input parameters are invalid.")
117
+ if "tenant_id" in req:
118
+ if req["tenant_id"] != tenant_id:
119
+ return get_error_data_result(
120
+ retmsg="Can't change tenant_id.")
121
+ if "embedding_model" in req:
122
+ if req["embedding_model"] != t.embd_id:
123
+ return get_error_data_result(
124
+ retmsg="Can't change embedding_model.")
125
+ req.pop("embedding_model")
126
+ e, kb = KnowledgebaseService.get_by_id(dataset_id)
127
+ if "chunk_count" in req:
128
+ if req["chunk_count"] != kb.chunk_num:
129
+ return get_error_data_result(
130
+ retmsg="Can't change chunk_count.")
131
+ req.pop("chunk_count")
132
+ if "document_count" in req:
133
+ if req['document_count'] != kb.doc_num:
134
+ return get_error_data_result(
135
+ retmsg="Can't change document_count.")
136
+ req.pop("document_count")
137
+ if "parse_method" in req:
138
+ if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id:
139
+ return get_error_data_result(
140
+ retmsg="If chunk count is not 0, parse method is not changable.")
141
+ req['parser_id'] = req.pop('parse_method')
142
+ if "name" in req:
143
+ req["name"] = req["name"].strip()
144
+ if req["name"].lower() != kb.name.lower() \
145
+ and len(KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id,
146
+ status=StatusEnum.VALID.value)) > 0:
147
+ return get_error_data_result(
148
+ retmsg="Duplicated knowledgebase name in updating dataset.")
149
+ if not KnowledgebaseService.update_by_id(kb.id, req):
150
+ return get_error_data_result(retmsg="Update dataset error.(Database error)")
151
+ return get_result(retcode=RetCode.SUCCESS)
152
+
153
+ @manager.route('/dataset', methods=['GET'])
154
  @token_required
155
+ def list(tenant_id):
156
+ id = request.args.get("id")
157
+ name = request.args.get("name")
158
+ kbs = KnowledgebaseService.query(id=id,name=name,status=1)
159
+ if not kbs:
160
+ return get_error_data_result(retmsg="The dataset doesn't exist")
161
  page_number = int(request.args.get("page", 1))
162
  items_per_page = int(request.args.get("page_size", 1024))
163
  orderby = request.args.get("orderby", "create_time")
164
  desc = bool(request.args.get("desc", True))
165
  tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
166
+ kbs = KnowledgebaseService.get_list(
167
+ [m["tenant_id"] for m in tenants], tenant_id, page_number, items_per_page, orderby, desc, id, name)
168
  renamed_list = []
169
  for kb in kbs:
170
  key_mapping = {
 
178
  new_key = key_mapping.get(key, key)
179
  renamed_data[new_key] = value
180
  renamed_list.append(renamed_data)
181
+ return get_result(data=renamed_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/db/services/knowledgebase_service.py CHANGED
@@ -142,3 +142,27 @@ class KnowledgebaseService(CommonService):
142
  @DB.connection_context()
143
  def get_all_ids(cls):
144
  return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  @DB.connection_context()
143
  def get_all_ids(cls):
144
  return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
145
+
146
+ @classmethod
147
+ @DB.connection_context()
148
+ def get_list(cls, joined_tenant_ids, user_id,
149
+ page_number, items_per_page, orderby, desc, id , name):
150
+ kbs = cls.model.select()
151
+ if id:
152
+ kbs = kbs.where(cls.model.id == id)
153
+ if name:
154
+ kbs = kbs.where(cls.model.name == name)
155
+ kbs = kbs.where(
156
+ ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
157
+ TenantPermission.TEAM.value)) | (
158
+ cls.model.tenant_id == user_id))
159
+ & (cls.model.status == StatusEnum.VALID.value)
160
+ )
161
+ if desc:
162
+ kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
163
+ else:
164
+ kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
165
+
166
+ kbs = kbs.paginate(page_number, items_per_page)
167
+
168
+ return list(kbs.dicts())
api/http_api.md CHANGED
@@ -5,63 +5,134 @@
5
 
6
  **POST** `/api/v1/dataset`
7
 
8
- Creates a dataset with a name. If dataset of the same name already exists, the new dataset will be renamed by RAGFlow automatically.
9
 
10
  ### Request
11
 
12
  - Method: POST
13
- - URL: `/api/v1/dataset`
14
  - Headers:
15
  - `content-Type: application/json`
16
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
17
  - Body:
18
- - `"dataset_name"`: `string`
 
 
19
  - `"tenant_id"`: `string`
 
 
20
  - `"embedding_model"`: `string`
21
- - `"chunk_count"`: `integer`
22
  - `"document_count"`: `integer`
 
23
  - `"parse_method"`: `string`
 
24
 
25
  #### Request example
26
 
27
- ```shell
 
 
 
 
 
28
  curl --request POST \
29
- --url http://{address}/api/v1/dataset \
30
- --header 'Content-Type: application/json' \
31
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
32
- --data-binary '{
33
- "dataset_name": "test",
34
- "tenant_id": "4fb0cd625f9311efba4a0242ac120006",
35
- "embedding_model": "BAAI/bge--zh-v1.5",
36
- "chunk_count": 0,
37
- "document_count": 0,
38
- "parse_method": "general"
39
  }'
40
  ```
41
 
42
  #### Request parameters
43
 
44
- - `"dataset_name"`: (*Body parameter*)
 
 
 
 
45
  The name of the dataset, which must adhere to the following requirements:
46
- - Maximum 65,535 characters.
 
 
 
 
 
47
  - `"tenant_id"`: (*Body parameter*)
48
- The ID of the tenant.
 
 
 
 
 
 
 
 
 
49
  - `"embedding_model"`: (*Body parameter*)
50
- Embedding model used in the dataset.
51
- - `"chunk_count"`: (*Body parameter*)
52
- Chunk count of the dataset.
 
 
 
 
53
  - `"document_count"`: (*Body parameter*)
54
- Document count of the dataset.
55
- - `"parse_mehtod"`: (*Body parameter*)
56
- Parsing method of the dataset.
 
 
 
 
 
 
 
 
 
 
57
 
58
  ### Response
59
 
60
  The successful response includes a JSON object like the following:
61
 
62
- ```shell
63
  {
64
- "code": 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  }
66
  ```
67
 
@@ -71,10 +142,10 @@ The successful response includes a JSON object like the following:
71
 
72
  The error response includes a JSON object like the following:
73
 
74
- ```shell
75
  {
76
- "code": 3016,
77
- "message": "Can't connect database"
78
  }
79
  ```
80
 
@@ -82,27 +153,31 @@ The error response includes a JSON object like the following:
82
 
83
  **DELETE** `/api/v1/dataset`
84
 
85
- Deletes a dataset by its id or name.
86
 
87
  ### Request
88
 
89
  - Method: DELETE
90
- - URL: `/api/v1/dataset/{dataset_id}`
91
  - Headers:
92
  - `content-Type: application/json`
93
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
 
94
 
95
 
96
  #### Request example
97
 
98
- ```shell
 
99
  curl --request DELETE \
100
- --url http://{address}/api/v1/dataset/0 \
101
- --header 'Content-Type: application/json' \
102
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
103
- --data ' {
104
- "names": ["ds1", "ds2"]
105
- }'
106
  ```
107
 
108
  #### Request parameters
@@ -118,7 +193,7 @@ curl --request DELETE \
118
 
119
  The successful response includes a JSON object like the following:
120
 
121
- ```shell
122
  {
123
  "code": 0
124
  }
@@ -130,10 +205,10 @@ The successful response includes a JSON object like the following:
130
 
131
  The error response includes a JSON object like the following:
132
 
133
- ```shell
134
  {
135
- "code": 3016,
136
- "message": "Try to delete non-existent dataset."
137
  }
138
  ```
139
 
@@ -146,50 +221,47 @@ Updates a dataset by its id.
146
  ### Request
147
 
148
  - Method: PUT
149
- - URL: `/api/v1/dataset/{dataset_id}`
150
  - Headers:
151
  - `content-Type: application/json`
152
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
153
 
154
 
155
  #### Request example
156
 
157
- ```shell
 
 
 
 
 
 
 
 
158
  curl --request PUT \
159
- --url http://{address}/api/v1/dataset/0 \
160
- --header 'Content-Type: application/json' \
161
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
162
- --data-binary '{
163
- "dataset_name": "test",
164
- "tenant_id": "4fb0cd625f9311efba4a0242ac120006",
165
- "embedding_model": "BAAI/bge--zh-v1.5",
166
- "chunk_count": 0,
167
- "document_count": 0,
168
- "parse_method": "general"
169
  }'
170
  ```
171
 
172
  #### Request parameters
 
173
 
174
- - `"dataset_name"`: (*Body parameter*)
175
- The name of the dataset, which must adhere to the following requirements:
176
- - Maximum 65,535 characters.
177
- - `"tenant_id"`: (*Body parameter*)
178
- The ID of the tenant.
179
- - `"embedding_model"`: (*Body parameter*)
180
- Embedding model used in the dataset.
181
- - `"chunk_count"`: (*Body parameter*)
182
- Chunk count of the dataset.
183
- - `"document_count"`: (*Body parameter*)
184
- Document count of the dataset.
185
- - `"parse_mehtod"`: (*Body parameter*)
186
- Parsing method of the dataset.
187
 
188
  ### Response
189
 
190
  The successful response includes a JSON object like the following:
191
 
192
- ```shell
193
  {
194
  "code": 0
195
  }
@@ -201,35 +273,37 @@ The successful response includes a JSON object like the following:
201
 
202
  The error response includes a JSON object like the following:
203
 
204
- ```shell
205
  {
206
- "code": 3016,
207
- "message": "Can't change embedding model since some files already use it."
208
  }
209
  ```
210
 
211
  ## List datasets
212
 
213
- **GET** `/api/v1/dataset?name={name}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
214
 
215
  List all datasets
216
 
217
  ### Request
218
 
219
  - Method: GET
220
- - URL: `/api/v1/dataset?name={name}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
221
  - Headers:
222
- - `content-Type: application/json`
223
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
224
 
225
 
226
  #### Request example
227
 
228
- ```shell
 
 
 
 
229
  curl --request GET \
230
- --url http://{address}/api/v1/dataset?page=0&page_size=50&orderby=create_time&desc=false \
231
- --header 'Content-Type: application/json' \
232
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
233
  ```
234
 
235
  #### Request parameters
@@ -244,54 +318,63 @@ curl --request GET \
244
  A boolean flag indicating whether the sorting should be in descending order.
245
  - `name`: (*Path parameter*)
246
  Dataset name
 
 
 
 
247
 
248
  ### Response
249
 
250
  The successful response includes a JSON object like the following:
251
 
252
- ```shell
253
  {
254
  "code": 0,
255
  "data": [
256
  {
257
- "avatar": "",
258
- "chunk_count": 0,
259
- "create_date": "Thu, 29 Aug 2024 03:13:07 GMT",
260
- "create_time": 1724901187843,
261
- "created_by": "4fb0cd625f9311efba4a0242ac120006",
262
- "description": "",
263
- "document_count": 0,
264
- "embedding_model": "BAAI/bge-large-zh-v1.5",
265
- "id": "9d3d906665b411ef87d10242ac120006",
266
- "language": "English",
267
- "name": "Test",
268
- "parser_config": {
269
- "chunk_token_count": 128,
270
- "delimiter": "\n!?。;!?",
271
- "layout_recognize": true,
272
- "task_page_size": 12
273
- },
274
- "parse_method": "naive",
275
- "permission": "me",
276
- "similarity_threshold": 0.2,
277
- "status": "1",
278
- "tenant_id": "4fb0cd625f9311efba4a0242ac120006",
279
- "token_count": 0,
280
- "update_date": "Thu, 29 Aug 2024 03:13:07 GMT",
281
- "update_time": 1724901187843,
282
- "vector_similarity_weight": 0.3
 
 
 
 
 
283
  }
284
- ],
285
  }
286
  ```
287
 
288
 
289
  The error response includes a JSON object like the following:
290
 
291
- ```shell
292
  {
293
- "code": 3016,
294
- "message": "Can't access database to get the dataset list."
295
  }
296
  ```
297
 
 
5
 
6
  **POST** `/api/v1/dataset`
7
 
8
+ Creates a dataset.
9
 
10
  ### Request
11
 
12
  - Method: POST
13
+ - URL: `http://{address}/api/v1/dataset`
14
  - Headers:
15
  - `content-Type: application/json`
16
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
17
  - Body:
18
+ - `"id"`: `string`
19
+ - `"name"`: `string`
20
+ - `"avatar"`: `string`
21
  - `"tenant_id"`: `string`
22
+ - `"description"`: `string`
23
+ - `"language"`: `string`
24
  - `"embedding_model"`: `string`
25
+ - `"permission"`: `string`
26
  - `"document_count"`: `integer`
27
+ - `"chunk_count"`: `integer`
28
  - `"parse_method"`: `string`
29
+ - `"parser_config"`: `Dataset.ParserConfig`
30
 
31
  #### Request example
32
 
33
+ ```bash
34
+ # "id": id must not be provided.
35
+ # "name": name is required and can't be duplicated.
36
+ # "tenant_id": tenant_id must not be provided.
37
+ # "embedding_model": embedding_model must not be provided.
38
+ # "navie" means general.
39
  curl --request POST \
40
+ --url http://{address}/api/v1/dataset \
41
+ --header 'Content-Type: application/json' \
42
+ --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
43
+ --data '{
44
+ "name": "test",
45
+ "chunk_count": 0,
46
+ "document_count": 0,
47
+ "parse_method": "naive"
 
 
48
  }'
49
  ```
50
 
51
  #### Request parameters
52
 
53
+ - `"id"`: (*Body parameter*)
54
+ The ID of the created dataset used to uniquely identify different datasets.
55
+ - If creating a dataset, `id` must not be provided.
56
+
57
+ - `"name"`: (*Body parameter*)
58
  The name of the dataset, which must adhere to the following requirements:
59
+ - Required when creating a dataset and must be unique.
60
+ - If updating a dataset, `name` must still be unique.
61
+
62
+ - `"avatar"`: (*Body parameter*)
63
+ Base64 encoding of the avatar.
64
+
65
  - `"tenant_id"`: (*Body parameter*)
66
+ The ID of the tenant associated with the dataset, used to link it with specific users.
67
+ - If creating a dataset, `tenant_id` must not be provided.
68
+ - If updating a dataset, `tenant_id` cannot be changed.
69
+
70
+ - `"description"`: (*Body parameter*)
71
+ The description of the dataset.
72
+
73
+ - `"language"`: (*Body parameter*)
74
+ The language setting for the dataset.
75
+
76
  - `"embedding_model"`: (*Body parameter*)
77
+ Embedding model used in the dataset to generate vector embeddings.
78
+ - If creating a dataset, `embedding_model` must not be provided.
79
+ - If updating a dataset, `embedding_model` cannot be changed.
80
+
81
+ - `"permission"`: (*Body parameter*)
82
+ Specifies who can manipulate the dataset.
83
+
84
  - `"document_count"`: (*Body parameter*)
85
+ Document count of the dataset.
86
+ - If updating a dataset, `document_count` cannot be changed.
87
+
88
+ - `"chunk_count"`: (*Body parameter*)
89
+ Chunk count of the dataset.
90
+ - If updating a dataset, `chunk_count` cannot be changed.
91
+
92
+ - `"parse_method"`: (*Body parameter*)
93
+ Parsing method of the dataset.
94
+ - If updating `parse_method`, `chunk_count` must be greater than 0.
95
+
96
+ - `"parser_config"`: (*Body parameter*)
97
+ The configuration settings for the dataset parser.
98
 
99
  ### Response
100
 
101
  The successful response includes a JSON object like the following:
102
 
103
+ ```json
104
  {
105
+ "code": 0,
106
+ "data": {
107
+ "avatar": null,
108
+ "chunk_count": 0,
109
+ "create_date": "Thu, 10 Oct 2024 05:57:37 GMT",
110
+ "create_time": 1728539857641,
111
+ "created_by": "69736c5e723611efb51b0242ac120007",
112
+ "description": null,
113
+ "document_count": 0,
114
+ "embedding_model": "BAAI/bge-large-zh-v1.5",
115
+ "id": "8d73076886cc11ef8c270242ac120006",
116
+ "language": "English",
117
+ "name": "test_1",
118
+ "parse_method": "naive",
119
+ "parser_config": {
120
+ "pages": [
121
+ [
122
+ 1,
123
+ 1000000
124
+ ]
125
+ ]
126
+ },
127
+ "permission": "me",
128
+ "similarity_threshold": 0.2,
129
+ "status": "1",
130
+ "tenant_id": "69736c5e723611efb51b0242ac120007",
131
+ "token_num": 0,
132
+ "update_date": "Thu, 10 Oct 2024 05:57:37 GMT",
133
+ "update_time": 1728539857641,
134
+ "vector_similarity_weight": 0.3
135
+ }
136
  }
137
  ```
138
 
 
142
 
143
  The error response includes a JSON object like the following:
144
 
145
+ ```json
146
  {
147
+ "code": 102,
148
+ "message": "Duplicated knowledgebase name in creating dataset."
149
  }
150
  ```
151
 
 
153
 
154
  **DELETE** `/api/v1/dataset`
155
 
156
+ Deletes datasets by ids or names.
157
 
158
  ### Request
159
 
160
  - Method: DELETE
161
+ - URL: `http://{address}/api/v1/dataset`
162
  - Headers:
163
  - `content-Type: application/json`
164
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
165
+ - Body:
166
+ - `"names"`: `List[string]`
167
+ - `"ids"`: `List[string]`
168
 
169
 
170
  #### Request example
171
 
172
+ ```bash
173
+ # Either id or name must be provided, but not both.
174
  curl --request DELETE \
175
+ --url http://{address}/api/v1/dataset \
176
+ --header 'Content-Type: application/json' \
177
+ --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
178
+ --data '{
179
+ "names": ["test_1", "test_2"]
180
+ }'
181
  ```
182
 
183
  #### Request parameters
 
193
 
194
  The successful response includes a JSON object like the following:
195
 
196
+ ```json
197
  {
198
  "code": 0
199
  }
 
205
 
206
  The error response includes a JSON object like the following:
207
 
208
+ ```json
209
  {
210
+ "code": 102,
211
+ "message": "You don't own the dataset."
212
  }
213
  ```
214
 
 
221
  ### Request
222
 
223
  - Method: PUT
224
+ - URL: `http://{address}/api/v1/dataset/{dataset_id}`
225
  - Headers:
226
  - `content-Type: application/json`
227
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
228
+ - Body: (Refer to the "Create Dataset" for the complete structure of the request body.)
229
 
230
 
231
  #### Request example
232
 
233
+ ```bash
234
+ # "id": id is required.
235
+ # "name": If you update name, it can't be duplicated.
236
+ # "tenant_id": If you update tenant_id, it can't be changed
237
+ # "embedding_model": If you update embedding_model, it can't be changed.
238
+ # "chunk_count": If you update chunk_count, it can't be changed.
239
+ # "document_count": If you update document_count, it can't be changed.
240
+ # "parse_method": If you update parse_method, chunk_count must be 0.
241
+ # "navie" means general.
242
  curl --request PUT \
243
+ --url http://{address}/api/v1/dataset/{dataset_id} \
244
+ --header 'Content-Type: application/json' \
245
+ --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
246
+ --data '{
247
+ "name": "test",
248
+ "tenant_id": "4fb0cd625f9311efba4a0242ac120006",
249
+ "embedding_model": "BAAI/bge-zh-v1.5",
250
+ "chunk_count": 0,
251
+ "document_count": 0,
252
+ "parse_method": "navie"
253
  }'
254
  ```
255
 
256
  #### Request parameters
257
+ (Refer to the "Create Dataset" for the complete structure of the request parameters.)
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  ### Response
261
 
262
  The successful response includes a JSON object like the following:
263
 
264
+ ```json
265
  {
266
  "code": 0
267
  }
 
273
 
274
  The error response includes a JSON object like the following:
275
 
276
+ ```json
277
  {
278
+ "code": 102,
279
+ "message": "Can't change tenant_id."
280
  }
281
  ```
282
 
283
  ## List datasets
284
 
285
+ **GET** `/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
286
 
287
  List all datasets
288
 
289
  ### Request
290
 
291
  - Method: GET
292
+ - URL: `http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
293
  - Headers:
 
294
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
295
 
296
 
297
  #### Request example
298
 
299
+ ```bash
300
+ # If no page parameter is passed, the default is 1
301
+ # If no page_size parameter is passed, the default is 1024
302
+ # If no order_by parameter is passed, the default is "create_time"
303
+ # If no desc parameter is passed, the default is True
304
  curl --request GET \
305
+ --url http://{address}/api/v1/dataset?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
306
+ --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
307
  ```
308
 
309
  #### Request parameters
 
318
  A boolean flag indicating whether the sorting should be in descending order.
319
  - `name`: (*Path parameter*)
320
  Dataset name
321
+ - - `"id"`: (*Path parameter*)
322
+ The ID of the dataset to be retrieved.
323
+ - `"name"`: (*Path parameter*)
324
+ The name of the dataset to be retrieved.
325
 
326
  ### Response
327
 
328
  The successful response includes a JSON object like the following:
329
 
330
+ ```json
331
  {
332
  "code": 0,
333
  "data": [
334
  {
335
+ "avatar": "",
336
+ "chunk_count": 59,
337
+ "create_date": "Sat, 14 Sep 2024 01:12:37 GMT",
338
+ "create_time": 1726276357324,
339
+ "created_by": "69736c5e723611efb51b0242ac120007",
340
+ "description": null,
341
+ "document_count": 1,
342
+ "embedding_model": "BAAI/bge-large-zh-v1.5",
343
+ "id": "6e211ee0723611efa10a0242ac120007",
344
+ "language": "English",
345
+ "name": "mysql",
346
+ "parse_method": "knowledge_graph",
347
+ "parser_config": {
348
+ "chunk_token_num": 8192,
349
+ "delimiter": "\\n!?;。;!?",
350
+ "entity_types": [
351
+ "organization",
352
+ "person",
353
+ "location",
354
+ "event",
355
+ "time"
356
+ ]
357
+ },
358
+ "permission": "me",
359
+ "similarity_threshold": 0.2,
360
+ "status": "1",
361
+ "tenant_id": "69736c5e723611efb51b0242ac120007",
362
+ "token_num": 12744,
363
+ "update_date": "Thu, 10 Oct 2024 04:07:23 GMT",
364
+ "update_time": 1728533243536,
365
+ "vector_similarity_weight": 0.3
366
  }
367
+ ]
368
  }
369
  ```
370
 
371
 
372
  The error response includes a JSON object like the following:
373
 
374
+ ```json
375
  {
376
+ "code": 102,
377
+ "message": "The dataset doesn't exist"
378
  }
379
  ```
380
 
api/python_api_reference.md CHANGED
@@ -38,9 +38,9 @@ The unique name of the dataset to create. It must adhere to the following requir
38
 
39
  #### avatar: `str`
40
 
41
- The url or ???????????????????????? path to the avatar image associated with the created dataset. Defaults to `""`
42
 
43
- #### tenant_id: `str` ?????????????????
44
 
45
  The id of the tenant associated with the created dataset is used to identify different users. Defaults to `None`.
46
 
@@ -55,9 +55,9 @@ The description of the created dataset. Defaults to `""`.
55
 
56
  The language setting of the created dataset. Defaults to `"English"`. ????????????
57
 
58
- #### embedding_model: `str` ????????????????
59
 
60
- The specific model or algorithm used by the dataset to generate vector embeddings. Defaults to `""`.
61
 
62
  - If creating a dataset, embedding_model must not be provided.
63
  - If updating a dataset, embedding_model can't be changed.
@@ -89,12 +89,10 @@ The method used by the dataset to parse and process data.
89
  The configuration settings for the parser used by the dataset.
90
 
91
  ### Returns
92
-
93
- - Success: An `infinity.local_infinity.table.LocalTable` object in Python module mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode.
94
- - Failure: `InfinityException`
95
- - `error_code`: `int` A non-zero value indicating a specific error condition.
96
- - `error_msg`: `str` A message providing additional details about the error.
97
-
98
  ### Examples
99
 
100
  ```python
@@ -106,19 +104,28 @@ ds = rag.create_dataset(name="kb_1")
106
 
107
  ---
108
 
109
- ## Delete knowledge base
110
 
111
  ```python
112
- DataSet.delete() -> bool
113
  ```
 
 
114
 
115
- Deletes a knowledge base.
116
 
117
- ### Returns
 
 
118
 
119
- `bool`
120
 
121
- description:the case of updating an dateset, `True` or `False`.
 
 
 
 
 
122
 
123
  ### Examples
124
 
@@ -126,8 +133,8 @@ description:the case of updating an dateset, `True` or `False`.
126
  from ragflow import RAGFlow
127
 
128
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
129
- ds = rag.create_dataset(name="kb_1")
130
- ds.delete()
131
  ```
132
 
133
  ---
@@ -139,7 +146,9 @@ RAGFlow.list_datasets(
139
  page: int = 1,
140
  page_size: int = 1024,
141
  orderby: str = "create_time",
142
- desc: bool = True
 
 
143
  ) -> List[DataSet]
144
  ```
145
 
@@ -163,51 +172,19 @@ The field by which the records should be sorted. This specifies the attribute or
163
 
164
  Whether the sorting should be in descending order. Defaults to `True`.
165
 
166
- ### Returns
167
-
168
- ```python
169
- List[DataSet]
170
- description:the list of datasets.
171
- ```
172
-
173
- ### Examples
174
-
175
- ```python
176
- from ragflow import RAGFlow
177
-
178
- rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
179
- for ds in rag.list_datasets():
180
- print(ds)
181
- ```
182
-
183
- ---
184
-
185
- ## Retrieve knowledge base
186
-
187
- ```python
188
- RAGFlow.get_dataset(
189
- id: str = None,
190
- name: str = None
191
- ) -> DataSet
192
- ```
193
 
194
- Retrieves a knowledge base by name.
195
-
196
- ### Parameters
197
 
198
  #### name: `str`
199
 
200
- The name of the dataset to be got. If `id` is not provided, `name` is required.
201
-
202
- #### id: `str`
203
-
204
- The id of the dataset to be got. If `name` is not provided, `id` is required.
205
 
206
  ### Returns
207
 
208
  ```python
209
- DataSet
210
- description: dataset object
211
  ```
212
 
213
  ### Examples
@@ -216,23 +193,23 @@ description: dataset object
216
  from ragflow import RAGFlow
217
 
218
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
219
- ds = rag.get_dataset(name="ragflow")
220
- print(ds)
221
  ```
222
 
223
  ---
224
 
225
- ## Save knowledge base configurations
 
226
 
227
  ```python
228
- DataSet.save() -> bool
229
  ```
230
 
231
  ### Returns
232
 
233
  ```python
234
- bool
235
- description:the case of updating an dateset, True or False.
236
  ```
237
 
238
  ### Examples
@@ -242,8 +219,7 @@ from ragflow import RAGFlow
242
 
243
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
244
  ds = rag.get_dataset(name="kb_1")
245
- ds.parse_method = "manual"
246
- ds.save()
247
  ```
248
 
249
  ---
 
38
 
39
  #### avatar: `str`
40
 
41
+ Base64 encoding of the avatar. Defaults to `""`
42
 
43
+ #### tenant_id: `str`
44
 
45
  The id of the tenant associated with the created dataset is used to identify different users. Defaults to `None`.
46
 
 
55
 
56
  The language setting of the created dataset. Defaults to `"English"`. ????????????
57
 
58
+ #### embedding_model: `str`
59
 
60
+ The specific model used by the dataset to generate vector embeddings. Defaults to `""`.
61
 
62
  - If creating a dataset, embedding_model must not be provided.
63
  - If updating a dataset, embedding_model can't be changed.
 
89
  The configuration settings for the parser used by the dataset.
90
 
91
  ### Returns
92
+ ```python
93
+ DataSet
94
+ description: dataset object
95
+ ```
 
 
96
  ### Examples
97
 
98
  ```python
 
104
 
105
  ---
106
 
107
+ ## Delete knowledge bases
108
 
109
  ```python
110
+ RAGFlow.delete_dataset(ids: List[str] = None, names: List[str] = None)
111
  ```
112
+ Deletes knowledge bases.
113
+ ### Parameters
114
 
115
+ #### ids: `List[str]`
116
 
117
+ The ids of the datasets to be deleted.
118
+
119
+ #### names: `List[str]`
120
 
121
+ The names of the datasets to be deleted.
122
 
123
+ Either `ids` or `names` must be provided, but not both.
124
+ ### Returns
125
+
126
+ ```python
127
+ no return
128
+ ```
129
 
130
  ### Examples
131
 
 
133
  from ragflow import RAGFlow
134
 
135
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
136
+ rag.delete_dataset(names=["name_1","name_2"])
137
+ rag.delete_dataset(ids=["id_1","id_2"])
138
  ```
139
 
140
  ---
 
146
  page: int = 1,
147
  page_size: int = 1024,
148
  orderby: str = "create_time",
149
+ desc: bool = True,
150
+ id: str = None,
151
+ name: str = None
152
  ) -> List[DataSet]
153
  ```
154
 
 
172
 
173
  Whether the sorting should be in descending order. Defaults to `True`.
174
 
175
+ #### id: `str`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ The id of the dataset to be got. Defaults to `None`.
 
 
178
 
179
  #### name: `str`
180
 
181
+ The name of the dataset to be got. Defaults to `None`.
 
 
 
 
182
 
183
  ### Returns
184
 
185
  ```python
186
+ List[DataSet]
187
+ description:the list of datasets.
188
  ```
189
 
190
  ### Examples
 
193
  from ragflow import RAGFlow
194
 
195
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
196
+ for ds in rag.list_datasets():
197
+ print(ds)
198
  ```
199
 
200
  ---
201
 
202
+
203
+ ## Update knowledge base
204
 
205
  ```python
206
+ DataSet.update(update_message: dict)
207
  ```
208
 
209
  ### Returns
210
 
211
  ```python
212
+ no return
 
213
  ```
214
 
215
  ### Examples
 
219
 
220
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
221
  ds = rag.get_dataset(name="kb_1")
222
+ ds.update({"parse_method":"manual", ...}}
 
223
  ```
224
 
225
  ---
api/utils/api_utils.py CHANGED
@@ -268,3 +268,32 @@ def token_required(func):
268
  return func(*args, **kwargs)
269
 
270
  return decorated_function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  return func(*args, **kwargs)
269
 
270
  return decorated_function
271
+
272
+ def get_result(retcode=RetCode.SUCCESS, retmsg='error', data=None):
273
+ if retcode == 0:
274
+ if data is not None:
275
+ response = {"code": retcode, "data": data}
276
+ else:
277
+ response = {"code": retcode}
278
+ else:
279
+ response = {"code": retcode, "message": retmsg}
280
+ return jsonify(response)
281
+
282
+ def get_error_data_result(retcode=RetCode.DATA_ERROR,
283
+ retmsg='Sorry! Data missing!'):
284
+ import re
285
+ result_dict = {
286
+ "code": retcode,
287
+ "message": re.sub(
288
+ r"rag",
289
+ "seceum",
290
+ retmsg,
291
+ flags=re.IGNORECASE)}
292
+ response = {}
293
+ for key, value in result_dict.items():
294
+ if value is None and key != "code":
295
+ continue
296
+ else:
297
+ response[key] = value
298
+ return jsonify(response)
299
+
sdk/python/ragflow/modules/base.py CHANGED
@@ -30,5 +30,9 @@ class Base(object):
30
  res = self.rag.delete(path, params)
31
  return res
32
 
 
 
 
 
33
  def __str__(self):
34
  return str(self.to_json())
 
30
  res = self.rag.delete(path, params)
31
  return res
32
 
33
+ def put(self,path, json):
34
+ res = self.rag.put(path,json)
35
+ return res
36
+
37
  def __str__(self):
38
  return str(self.to_json())
sdk/python/ragflow/modules/dataset.py CHANGED
@@ -32,24 +32,13 @@ class DataSet(Base):
32
  res_dict.pop(k)
33
  super().__init__(rag, res_dict)
34
 
35
- def save(self) -> bool:
36
- res = self.post('/dataset/save',
37
- {"id": self.id, "name": self.name, "avatar": self.avatar, "tenant_id": self.tenant_id,
38
- "description": self.description, "language": self.language, "embedding_model": self.embedding_model,
39
- "permission": self.permission,
40
- "document_count": self.document_count, "chunk_count": self.chunk_count, "parse_method": self.parse_method,
41
- "parser_config": self.parser_config.to_json()
42
- })
43
  res = res.json()
44
- if res.get("retmsg") == "success": return True
45
- raise Exception(res["retmsg"])
46
 
47
- def delete(self) -> bool:
48
- res = self.rm('/dataset/delete',
49
- {"id": self.id})
50
- res = res.json()
51
- if res.get("retmsg") == "success": return True
52
- raise Exception(res["retmsg"])
53
 
54
  def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
55
  """
 
32
  res_dict.pop(k)
33
  super().__init__(rag, res_dict)
34
 
35
+ def update(self, update_message: dict):
36
+ res = self.put(f'/dataset/{self.id}',
37
+ update_message)
 
 
 
 
 
38
  res = res.json()
39
+ if res.get("code") != 0:
40
+ raise Exception(res["message"])
41
 
 
 
 
 
 
 
42
 
43
  def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
44
  """
sdk/python/ragflow/ragflow.py CHANGED
@@ -18,9 +18,9 @@ from typing import List
18
  import requests
19
 
20
  from .modules.assistant import Assistant
 
21
  from .modules.dataset import DataSet
22
  from .modules.document import Document
23
- from .modules.chunk import Chunk
24
 
25
 
26
  class RAGFlow:
@@ -41,7 +41,11 @@ class RAGFlow:
41
  return res
42
 
43
  def delete(self, path, params):
44
- res = requests.delete(url=self.api_url + path, params=params, headers=self.authorization_header)
 
 
 
 
45
  return res
46
 
47
  def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
@@ -52,7 +56,7 @@ class RAGFlow:
52
  parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
53
  "delimiter": "\n!?。;!?", "task_page_size": 12})
54
  parser_config = parser_config.to_json()
55
- res = self.post("/dataset/save",
56
  {"name": name, "avatar": avatar, "description": description, "language": language,
57
  "permission": permission,
58
  "document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
@@ -60,27 +64,28 @@ class RAGFlow:
60
  }
61
  )
62
  res = res.json()
63
- if res.get("retmsg") == "success":
64
  return DataSet(self, res["data"])
65
- raise Exception(res["retmsg"])
 
 
 
 
 
 
66
 
67
- def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True) -> \
 
68
  List[DataSet]:
69
- res = self.get("/dataset/list", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc})
 
70
  res = res.json()
71
  result_list = []
72
- if res.get("retmsg") == "success":
73
  for data in res['data']:
74
  result_list.append(DataSet(self, data))
75
  return result_list
76
- raise Exception(res["retmsg"])
77
-
78
- def get_dataset(self, id: str = None, name: str = None) -> DataSet:
79
- res = self.get("/dataset/detail", {"id": id, "name": name})
80
- res = res.json()
81
- if res.get("retmsg") == "success":
82
- return DataSet(self, res['data'])
83
- raise Exception(res["retmsg"])
84
 
85
  def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
86
  llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant:
@@ -272,4 +277,3 @@ class RAGFlow:
272
  except Exception as e:
273
  print(f"An error occurred during retrieval: {e}")
274
  raise
275
-
 
18
  import requests
19
 
20
  from .modules.assistant import Assistant
21
+ from .modules.chunk import Chunk
22
  from .modules.dataset import DataSet
23
  from .modules.document import Document
 
24
 
25
 
26
  class RAGFlow:
 
41
  return res
42
 
43
  def delete(self, path, params):
44
+ res = requests.delete(url=self.api_url + path, json=params, headers=self.authorization_header)
45
+ return res
46
+
47
+ def put(self, path, json):
48
+ res = requests.put(url=self.api_url + path, json= json,headers=self.authorization_header)
49
  return res
50
 
51
  def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
 
56
  parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
57
  "delimiter": "\n!?。;!?", "task_page_size": 12})
58
  parser_config = parser_config.to_json()
59
+ res = self.post("/dataset",
60
  {"name": name, "avatar": avatar, "description": description, "language": language,
61
  "permission": permission,
62
  "document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
 
64
  }
65
  )
66
  res = res.json()
67
+ if res.get("code") == 0:
68
  return DataSet(self, res["data"])
69
+ raise Exception(res["message"])
70
+
71
+ def delete_dataset(self, ids: List[str] = None, names: List[str] = None):
72
+ res = self.delete("/dataset",{"ids": ids, "names": names})
73
+ res=res.json()
74
+ if res.get("code") != 0:
75
+ raise Exception(res["message"])
76
 
77
+ def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
78
+ id: str = None, name: str = None) -> \
79
  List[DataSet]:
80
+ res = self.get("/dataset",
81
+ {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
82
  res = res.json()
83
  result_list = []
84
+ if res.get("code") == 0:
85
  for data in res['data']:
86
  result_list.append(DataSet(self, data))
87
  return result_list
88
+ raise Exception(res["message"])
 
 
 
 
 
 
 
89
 
90
  def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
91
  llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant:
 
277
  except Exception as e:
278
  print(f"An error occurred during retrieval: {e}")
279
  raise
 
sdk/python/test/common.py CHANGED
@@ -1,4 +1,4 @@
1
 
2
 
3
- API_KEY = 'ragflow-k0YzUxMGY4NjY5YTExZWY5MjI5MDI0Mm'
4
  HOST_ADDRESS = 'http://127.0.0.1:9380'
 
1
 
2
 
3
+ API_KEY = 'ragflow-NiYmZjNTVjODYwNzExZWZiODEwMDI0Mm'
4
  HOST_ADDRESS = 'http://127.0.0.1:9380'
sdk/python/test/t_dataset.py CHANGED
@@ -24,9 +24,8 @@ class TestDataset(TestSdk):
24
  ds = rag.create_dataset("ABC")
25
  if isinstance(ds, DataSet):
26
  assert ds.name == "ABC", "Name does not match."
27
- ds.name = 'DEF'
28
- res = ds.save()
29
- assert res is True, f"Failed to update dataset, error: {res}"
30
  else:
31
  assert False, f"Failed to create dataset, error: {ds}"
32
 
@@ -38,8 +37,8 @@ class TestDataset(TestSdk):
38
  ds = rag.create_dataset("MA")
39
  if isinstance(ds, DataSet):
40
  assert ds.name == "MA", "Name does not match."
41
- res = ds.delete()
42
- assert res is True, f"Failed to delete dataset, error: {res}"
43
  else:
44
  assert False, f"Failed to create dataset, error: {ds}"
45
 
@@ -52,12 +51,3 @@ class TestDataset(TestSdk):
52
  assert len(list_datasets) > 0, "Do not exist any dataset"
53
  for ds in list_datasets:
54
  assert isinstance(ds, DataSet), "Existence type is not dataset."
55
-
56
- def test_get_detail_dataset_with_success(self):
57
- """
58
- Test getting a dataset's detail with success
59
- """
60
- rag = RAGFlow(API_KEY, HOST_ADDRESS)
61
- ds = rag.get_dataset(name="God")
62
- assert isinstance(ds, DataSet), f"Failed to get dataset, error: {ds}."
63
- assert ds.name == "God", "Name does not match"
 
24
  ds = rag.create_dataset("ABC")
25
  if isinstance(ds, DataSet):
26
  assert ds.name == "ABC", "Name does not match."
27
+ res = ds.update({"name":"DEF"})
28
+ assert res is None, f"Failed to update dataset, error: {res}"
 
29
  else:
30
  assert False, f"Failed to create dataset, error: {ds}"
31
 
 
37
  ds = rag.create_dataset("MA")
38
  if isinstance(ds, DataSet):
39
  assert ds.name == "MA", "Name does not match."
40
+ res = rag.delete_dataset(names=["MA"])
41
+ assert res is None, f"Failed to delete dataset, error: {res}"
42
  else:
43
  assert False, f"Failed to create dataset, error: {ds}"
44
 
 
51
  assert len(list_datasets) > 0, "Do not exist any dataset"
52
  for ds in list_datasets:
53
  assert isinstance(ds, DataSet), "Existence type is not dataset."