JobSmithManipulation commited on
Commit
d78cac8
·
1 Parent(s): 292a582

change chunk.status to chunk.available (#2646)

Browse files

### What problem does this PR solve?

#1102

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

api/apps/sdk/doc.py CHANGED
@@ -609,8 +609,8 @@ def set(tenant_id):
609
  d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
610
  d["important_kwd"] = req["important_keywords"]
611
  d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
612
- if "available_int" in req:
613
- d["available_int"] = req["available_int"]
614
 
615
  try:
616
  tenant_id = DocumentService.get_tenant_id(req["document_id"])
 
609
  d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
610
  d["important_kwd"] = req["important_keywords"]
611
  d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
612
+ if "available" in req:
613
+ d["available_int"] = req["available"]
614
 
615
  try:
616
  tenant_id = DocumentService.get_tenant_id(req["document_id"])
sdk/python/ragflow/modules/chunk.py CHANGED
@@ -11,7 +11,7 @@ class Chunk(Base):
11
  self.knowledgebase_id = None
12
  self.document_name = ""
13
  self.document_id = ""
14
- self.status = "1"
15
  for k in list(res_dict.keys()):
16
  if k not in self.__dict__:
17
  res_dict.pop(k)
@@ -39,7 +39,7 @@ class Chunk(Base):
39
  "content": self.content,
40
  "important_keywords": self.important_keywords,
41
  "document_id": self.document_id,
42
- "status": self.status,
43
  })
44
  res = res.json()
45
  if res.get("retmsg") == "success":
 
11
  self.knowledgebase_id = None
12
  self.document_name = ""
13
  self.document_id = ""
14
+ self.available = 1
15
  for k in list(res_dict.keys()):
16
  if k not in self.__dict__:
17
  res_dict.pop(k)
 
39
  "content": self.content,
40
  "important_keywords": self.important_keywords,
41
  "document_id": self.document_id,
42
+ "available": self.available,
43
  })
44
  res = res.json()
45
  if res.get("retmsg") == "success":
sdk/python/test/t_document.py CHANGED
@@ -151,14 +151,12 @@ class TestDocument(TestSdk):
151
  name3 = 'westworld.pdf'
152
  path = 'test_data/westworld.pdf'
153
 
154
-
155
  # Create a document in the dataset using the file path
156
  rag.create_document(ds, name=name3, blob=open(path, "rb").read())
157
 
158
  # Retrieve the document by name
159
  doc = rag.get_document(name="westworld.pdf")
160
 
161
-
162
  # Initiate asynchronous parsing
163
  doc.async_parse()
164
 
@@ -231,7 +229,7 @@ class TestDocument(TestSdk):
231
  def test_parse_document_and_chunk_list(self):
232
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
233
  ds = rag.create_dataset(name="God7")
234
- name='story.txt'
235
  path = 'test_data/story.txt'
236
  # name = "Test Document rag.txt"
237
  # blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps."
@@ -266,11 +264,11 @@ class TestDocument(TestSdk):
266
  assert chunk is not None, "Chunk is None"
267
  assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
268
  doc = rag.get_document(name='story.txt')
269
- chunk_count_before=doc.chunk_count
270
  chunk.delete()
271
  doc = rag.get_document(name='story.txt')
272
- assert doc.chunk_count == chunk_count_before-1, "Chunk was not deleted"
273
-
274
  def test_update_chunk_content(self):
275
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
276
  doc = rag.get_document(name='story.txt')
@@ -278,9 +276,19 @@ class TestDocument(TestSdk):
278
  assert chunk is not None, "Chunk is None"
279
  assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
280
  chunk.content = "ragflow123"
281
- res=chunk.save()
282
- assert res is True, f"Failed to update chunk, error: {res}"
283
-
 
 
 
 
 
 
 
 
 
 
284
  def test_retrieval_chunks(self):
285
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
286
  ds = rag.create_dataset(name="God8")
 
151
  name3 = 'westworld.pdf'
152
  path = 'test_data/westworld.pdf'
153
 
 
154
  # Create a document in the dataset using the file path
155
  rag.create_document(ds, name=name3, blob=open(path, "rb").read())
156
 
157
  # Retrieve the document by name
158
  doc = rag.get_document(name="westworld.pdf")
159
 
 
160
  # Initiate asynchronous parsing
161
  doc.async_parse()
162
 
 
229
  def test_parse_document_and_chunk_list(self):
230
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
231
  ds = rag.create_dataset(name="God7")
232
+ name = 'story.txt'
233
  path = 'test_data/story.txt'
234
  # name = "Test Document rag.txt"
235
  # blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps."
 
264
  assert chunk is not None, "Chunk is None"
265
  assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
266
  doc = rag.get_document(name='story.txt')
267
+ chunk_count_before = doc.chunk_count
268
  chunk.delete()
269
  doc = rag.get_document(name='story.txt')
270
+ assert doc.chunk_count == chunk_count_before - 1, "Chunk was not deleted"
271
+
272
  def test_update_chunk_content(self):
273
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
274
  doc = rag.get_document(name='story.txt')
 
276
  assert chunk is not None, "Chunk is None"
277
  assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
278
  chunk.content = "ragflow123"
279
+ res = chunk.save()
280
+ assert res is True, f"Failed to update chunk content, error: {res}"
281
+
282
+ def test_update_chunk_available(self):
283
+ rag = RAGFlow(API_KEY, HOST_ADDRESS)
284
+ doc = rag.get_document(name='story.txt')
285
+ chunk = doc.add_chunk(content="ragflow")
286
+ assert chunk is not None, "Chunk is None"
287
+ assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
288
+ chunk.available = 0
289
+ res = chunk.save()
290
+ assert res is True, f"Failed to update chunk status, error: {res}"
291
+
292
  def test_retrieval_chunks(self):
293
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
294
  ds = rag.create_dataset(name="God8")