LiuHua root commited on
Commit
7889c99
·
1 Parent(s): 788c296

create dataset (#2074)

Browse files

### What problem does this PR solve?

You can use sdk to create a dataset

### Type of change

- [x] New Feature

---------

Co-authored-by: root <root@xwg>

sdk/python/ragflow/modules/__init__.py ADDED
File without changes
sdk/python/ragflow/modules/base.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Base(object):
2
+ def __init__(self, rag, res_dict):
3
+ self.rag = rag
4
+ for k, v in res_dict.items():
5
+ if isinstance(v, dict):
6
+ self.__dict__[k] = Base(rag, v)
7
+ else:
8
+ self.__dict__[k] = v
9
+
10
+ def to_json(self):
11
+ pr = {}
12
+ for name in dir(self):
13
+ value = getattr(self, name)
14
+ if not name.startswith('__') and not callable(value) and name != "rag":
15
+ if isinstance(value, Base):
16
+ pr[name] = value.to_json()
17
+ else:
18
+ pr[name] = value
19
+ return pr
20
+
21
+
22
+ def post(self, path, param):
23
+ res = self.rag.post(path,param)
24
+ return res
25
+
26
+ def get(self, path, params=''):
27
+ res = self.rag.get(path,params)
28
+ return res
29
+
30
+
sdk/python/ragflow/modules/dataset.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import Base
2
+
3
+
4
+ class DataSet(Base):
5
+ class ParseConfig(Base):
6
+ def __init__(self, rag, res_dict):
7
+ self.chunk_token_count = 128
8
+ self.layout_recognize = True
9
+ self.delimiter = '\n!?。;!?'
10
+ self.task_page_size = 12
11
+ super().__init__(rag, res_dict)
12
+
13
+ def __init__(self, rag, res_dict):
14
+ self.id = ""
15
+ self.name = ""
16
+ self.avatar = ""
17
+ self.tenant_id = None
18
+ self.description = ""
19
+ self.language = "English"
20
+ self.embedding_model = ""
21
+ self.permission = "me"
22
+ self.document_count = 0
23
+ self.chunk_count = 0
24
+ self.parse_method = 0
25
+ self.parser_config = None
26
+ super().__init__(rag, res_dict)
27
+
28
+ def delete(self):
29
+ try:
30
+ self.post("/rm", {"kb_id": self.id})
31
+ return True
32
+ except Exception:
33
+ return False
sdk/python/ragflow/ragflow.py CHANGED
@@ -12,35 +12,56 @@
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
- import json
16
- import os
17
 
18
  import requests
19
 
20
- from api.db.services.document_service import DocumentService
21
- from api.settings import RetCode
22
 
23
 
24
  class RAGFlow:
25
  def __init__(self, user_key, base_url, version='v1'):
26
  """
27
- api_url: http://<host_address>/api/v1
28
- dataset_url: http://<host_address>/api/v1/dataset
29
- document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents
30
  """
31
  self.user_key = user_key
32
- self.api_url = f"{base_url}/api/{version}"
33
- self.dataset_url = f"{self.api_url}/dataset"
34
  self.authorization_header = {"Authorization": "{}".format(self.user_key)}
 
 
 
 
 
 
 
 
 
35
 
36
  def create_dataset(self, dataset_name):
37
  """
38
  name: dataset name
39
  """
40
- res = requests.post(url=self.dataset_url, json={"name": dataset_name}, headers=self.authorization_header)
41
- result_dict = json.loads(res.text)
42
- return result_dict
43
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def delete_dataset(self, dataset_name):
45
  dataset_id = self.find_dataset_id_by_name(dataset_name)
46
 
@@ -55,16 +76,6 @@ class RAGFlow:
55
  return dataset["id"]
56
  return None
57
 
58
- def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True):
59
- params = {
60
- "offset": offset,
61
- "count": count,
62
- "orderby": orderby,
63
- "desc": desc
64
- }
65
- response = requests.get(url=self.dataset_url, params=params, headers=self.authorization_header)
66
- return response.json()
67
-
68
  def get_dataset(self, dataset_name):
69
  dataset_id = self.find_dataset_id_by_name(dataset_name)
70
  endpoint = f"{self.dataset_url}/{dataset_id}"
@@ -78,7 +89,7 @@ class RAGFlow:
78
  response = requests.put(endpoint, json=params, headers=self.authorization_header)
79
  return response.json()
80
 
81
- # ------------------------------- CONTENT MANAGEMENT -----------------------------------------------------
82
 
83
  # ----------------------------upload local files-----------------------------------------------------
84
  def upload_local_file(self, dataset_id, file_paths):
@@ -186,4 +197,4 @@ class RAGFlow:
186
  # ----------------------------get a specific chunk-----------------------------------------------------
187
 
188
  # ----------------------------retrieval test-----------------------------------------------------
189
-
 
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
 
 
15
 
16
  import requests
17
 
18
+ from .modules.dataset import DataSet
 
19
 
20
 
21
  class RAGFlow:
22
  def __init__(self, user_key, base_url, version='v1'):
23
  """
24
+ api_url: http://<host_address>/v1
25
+ dataset_url: http://<host_address>/v1/kb
26
+ document_url: http://<host_address>/v1/dataset/{dataset_id}/documents
27
  """
28
  self.user_key = user_key
29
+ self.api_url = f"{base_url}/{version}"
30
+ self.dataset_url = f"{self.api_url}/kb"
31
  self.authorization_header = {"Authorization": "{}".format(self.user_key)}
32
+ self.base_url = base_url
33
+
34
+ def post(self, path, param):
35
+ res = requests.post(url=self.dataset_url + path, json=param, headers=self.authorization_header)
36
+ return res
37
+
38
+ def get(self, path, params=''):
39
+ res = requests.get(self.dataset_url + path, params=params, headers=self.authorization_header)
40
+ return res
41
 
42
  def create_dataset(self, dataset_name):
43
  """
44
  name: dataset name
45
  """
46
+ res_create = self.post("/create", {"name": dataset_name})
47
+ res_create_data = res_create.json()['data']
48
+ res_detail = self.get("/detail", {"kb_id": res_create_data["kb_id"]})
49
+ res_detail_data = res_detail.json()['data']
50
+ result = {}
51
+ result['id'] = res_detail_data['id']
52
+ result['name'] = res_detail_data['name']
53
+ result['avatar'] = res_detail_data['avatar']
54
+ result['description'] = res_detail_data['description']
55
+ result['language'] = res_detail_data['language']
56
+ result['embedding_model'] = res_detail_data['embd_id']
57
+ result['permission'] = res_detail_data['permission']
58
+ result['document_count'] = res_detail_data['doc_num']
59
+ result['chunk_count'] = res_detail_data['chunk_num']
60
+ result['parser_config'] = res_detail_data['parser_config']
61
+ dataset = DataSet(self, result)
62
+ return dataset
63
+
64
+ """
65
  def delete_dataset(self, dataset_name):
66
  dataset_id = self.find_dataset_id_by_name(dataset_name)
67
 
 
76
  return dataset["id"]
77
  return None
78
 
 
 
 
 
 
 
 
 
 
 
79
  def get_dataset(self, dataset_name):
80
  dataset_id = self.find_dataset_id_by_name(dataset_name)
81
  endpoint = f"{self.dataset_url}/{dataset_id}"
 
89
  response = requests.put(endpoint, json=params, headers=self.authorization_header)
90
  return response.json()
91
 
92
+ # ------------------------------- CONTENT MANAGEMENT -----------------------------------------------------
93
 
94
  # ----------------------------upload local files-----------------------------------------------------
95
  def upload_local_file(self, dataset_id, file_paths):
 
197
  # ----------------------------get a specific chunk-----------------------------------------------------
198
 
199
  # ----------------------------retrieval test-----------------------------------------------------
200
+ """
sdk/python/setup.py CHANGED
@@ -15,5 +15,7 @@
15
  import setuptools
16
 
17
  if __name__ == "__main__":
18
- setuptools.setup(packages=['ragflow'])
 
 
19
 
 
15
  import setuptools
16
 
17
  if __name__ == "__main__":
18
+ setuptools.setup(name='ragflow',
19
+ version="0.1",
20
+ packages=setuptools.find_packages())
21
 
sdk/python/test/common.py CHANGED
@@ -1,4 +1,4 @@
1
 
2
 
3
- API_KEY = 'IjJkOGQ4ZDE2MzkyMjExZWZhYTk0MzA0M2Q3ZWU1MzdlIg.ZoUfug.RmqcYyCrlAnLtkzk6bYXiXN3eEY'
4
  HOST_ADDRESS = 'http://127.0.0.1:9380'
 
1
 
2
 
3
+ API_KEY = 'IjUxNGM0MmM4NWY5MzExZWY5MDhhMDI0MmFjMTIwMDA2Ig.ZsWebA.mV1NKdSPPllgowiH-7vz36tMWyI'
4
  HOST_ADDRESS = 'http://127.0.0.1:9380'
sdk/python/test/t_dataset.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ragflow import RAGFlow
2
+
3
+ from common import API_KEY, HOST_ADDRESS
4
+ from test_sdkbase import TestSdk
5
+
6
+
7
+ class TestDataset(TestSdk):
8
+ def test_create_dataset_with_success(self):
9
+ rag = RAGFlow(API_KEY, HOST_ADDRESS)
10
+ ds = rag.create_dataset("God")
11
+ assert ds is not None, "The dataset creation failed, returned None."
12
+ assert ds.name == "God", "Dataset name does not match."
13
+
14
+ def test_delete_one_file(self):
15
+ """
16
+ Test deleting one file with success.
17
+ """
18
+ rag = RAGFlow(API_KEY, HOST_ADDRESS)
19
+ ds = rag.create_dataset("ABC")
20
+ assert ds is not None, "Failed to create dataset"
21
+ assert ds.name == "ABC", "Dataset name mismatch"
22
+ delete_result = ds.delete()
23
+ assert delete_result is True, "Failed to delete dataset"