JobSmithManipulation
commited on
Commit
·
d78cac8
1
Parent(s):
292a582
change chunk.status to chunk.available (#2646)
Browse files### What problem does this PR solve?
#1102
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/sdk/doc.py +2 -2
- sdk/python/ragflow/modules/chunk.py +2 -2
- sdk/python/test/t_document.py +17 -9
api/apps/sdk/doc.py
CHANGED
@@ -609,8 +609,8 @@ def set(tenant_id):
|
|
609 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
610 |
d["important_kwd"] = req["important_keywords"]
|
611 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
|
612 |
-
if "
|
613 |
-
d["available_int"] = req["
|
614 |
|
615 |
try:
|
616 |
tenant_id = DocumentService.get_tenant_id(req["document_id"])
|
|
|
609 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
610 |
d["important_kwd"] = req["important_keywords"]
|
611 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
|
612 |
+
if "available" in req:
|
613 |
+
d["available_int"] = req["available"]
|
614 |
|
615 |
try:
|
616 |
tenant_id = DocumentService.get_tenant_id(req["document_id"])
|
sdk/python/ragflow/modules/chunk.py
CHANGED
@@ -11,7 +11,7 @@ class Chunk(Base):
|
|
11 |
self.knowledgebase_id = None
|
12 |
self.document_name = ""
|
13 |
self.document_id = ""
|
14 |
-
self.
|
15 |
for k in list(res_dict.keys()):
|
16 |
if k not in self.__dict__:
|
17 |
res_dict.pop(k)
|
@@ -39,7 +39,7 @@ class Chunk(Base):
|
|
39 |
"content": self.content,
|
40 |
"important_keywords": self.important_keywords,
|
41 |
"document_id": self.document_id,
|
42 |
-
"
|
43 |
})
|
44 |
res = res.json()
|
45 |
if res.get("retmsg") == "success":
|
|
|
11 |
self.knowledgebase_id = None
|
12 |
self.document_name = ""
|
13 |
self.document_id = ""
|
14 |
+
self.available = 1
|
15 |
for k in list(res_dict.keys()):
|
16 |
if k not in self.__dict__:
|
17 |
res_dict.pop(k)
|
|
|
39 |
"content": self.content,
|
40 |
"important_keywords": self.important_keywords,
|
41 |
"document_id": self.document_id,
|
42 |
+
"available": self.available,
|
43 |
})
|
44 |
res = res.json()
|
45 |
if res.get("retmsg") == "success":
|
sdk/python/test/t_document.py
CHANGED
@@ -151,14 +151,12 @@ class TestDocument(TestSdk):
|
|
151 |
name3 = 'westworld.pdf'
|
152 |
path = 'test_data/westworld.pdf'
|
153 |
|
154 |
-
|
155 |
# Create a document in the dataset using the file path
|
156 |
rag.create_document(ds, name=name3, blob=open(path, "rb").read())
|
157 |
|
158 |
# Retrieve the document by name
|
159 |
doc = rag.get_document(name="westworld.pdf")
|
160 |
|
161 |
-
|
162 |
# Initiate asynchronous parsing
|
163 |
doc.async_parse()
|
164 |
|
@@ -231,7 +229,7 @@ class TestDocument(TestSdk):
|
|
231 |
def test_parse_document_and_chunk_list(self):
|
232 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
233 |
ds = rag.create_dataset(name="God7")
|
234 |
-
name='story.txt'
|
235 |
path = 'test_data/story.txt'
|
236 |
# name = "Test Document rag.txt"
|
237 |
# blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps."
|
@@ -266,11 +264,11 @@ class TestDocument(TestSdk):
|
|
266 |
assert chunk is not None, "Chunk is None"
|
267 |
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
268 |
doc = rag.get_document(name='story.txt')
|
269 |
-
chunk_count_before=doc.chunk_count
|
270 |
chunk.delete()
|
271 |
doc = rag.get_document(name='story.txt')
|
272 |
-
assert doc.chunk_count == chunk_count_before-1, "Chunk was not deleted"
|
273 |
-
|
274 |
def test_update_chunk_content(self):
|
275 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
276 |
doc = rag.get_document(name='story.txt')
|
@@ -278,9 +276,19 @@ class TestDocument(TestSdk):
|
|
278 |
assert chunk is not None, "Chunk is None"
|
279 |
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
280 |
chunk.content = "ragflow123"
|
281 |
-
res=chunk.save()
|
282 |
-
assert res is True, f"Failed to update chunk, error: {res}"
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
def test_retrieval_chunks(self):
|
285 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
286 |
ds = rag.create_dataset(name="God8")
|
|
|
151 |
name3 = 'westworld.pdf'
|
152 |
path = 'test_data/westworld.pdf'
|
153 |
|
|
|
154 |
# Create a document in the dataset using the file path
|
155 |
rag.create_document(ds, name=name3, blob=open(path, "rb").read())
|
156 |
|
157 |
# Retrieve the document by name
|
158 |
doc = rag.get_document(name="westworld.pdf")
|
159 |
|
|
|
160 |
# Initiate asynchronous parsing
|
161 |
doc.async_parse()
|
162 |
|
|
|
229 |
def test_parse_document_and_chunk_list(self):
|
230 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
231 |
ds = rag.create_dataset(name="God7")
|
232 |
+
name = 'story.txt'
|
233 |
path = 'test_data/story.txt'
|
234 |
# name = "Test Document rag.txt"
|
235 |
# blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps."
|
|
|
264 |
assert chunk is not None, "Chunk is None"
|
265 |
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
266 |
doc = rag.get_document(name='story.txt')
|
267 |
+
chunk_count_before = doc.chunk_count
|
268 |
chunk.delete()
|
269 |
doc = rag.get_document(name='story.txt')
|
270 |
+
assert doc.chunk_count == chunk_count_before - 1, "Chunk was not deleted"
|
271 |
+
|
272 |
def test_update_chunk_content(self):
|
273 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
274 |
doc = rag.get_document(name='story.txt')
|
|
|
276 |
assert chunk is not None, "Chunk is None"
|
277 |
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
278 |
chunk.content = "ragflow123"
|
279 |
+
res = chunk.save()
|
280 |
+
assert res is True, f"Failed to update chunk content, error: {res}"
|
281 |
+
|
282 |
+
def test_update_chunk_available(self):
|
283 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
284 |
+
doc = rag.get_document(name='story.txt')
|
285 |
+
chunk = doc.add_chunk(content="ragflow")
|
286 |
+
assert chunk is not None, "Chunk is None"
|
287 |
+
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
288 |
+
chunk.available = 0
|
289 |
+
res = chunk.save()
|
290 |
+
assert res is True, f"Failed to update chunk status, error: {res}"
|
291 |
+
|
292 |
def test_retrieval_chunks(self):
|
293 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
294 |
ds = rag.create_dataset(name="God8")
|