cecilia-uu commited on
Commit
8337858
·
1 Parent(s): fa34f3e

API: Stop parsing (#1556)

Browse files

### What problem does this PR solve?

Aims to stop the process of parsing.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

api/apps/dataset_api.py CHANGED
@@ -16,6 +16,7 @@ import os
16
  import pathlib
17
  import re
18
  import warnings
 
19
  from io import BytesIO
20
 
21
  from elasticsearch_dsl import Q
@@ -26,13 +27,12 @@ from httpx import HTTPError
26
  from api.contants import NAME_LENGTH_LIMIT
27
  from api.db import FileType, ParserType, FileSource, TaskStatus
28
  from api.db import StatusEnum
29
- from api.db.db_models import File, Task
30
  from api.db.services import duplicate_name
31
  from api.db.services.document_service import DocumentService
32
  from api.db.services.file2document_service import File2DocumentService
33
  from api.db.services.file_service import FileService
34
  from api.db.services.knowledgebase_service import KnowledgebaseService
35
- from api.db.services.task_service import TaskService
36
  from api.db.services.user_service import TenantService
37
  from api.settings import RetCode
38
  from api.utils import get_uuid
@@ -233,9 +233,10 @@ def update_dataset(dataset_id):
233
  if chunk_num == 0:
234
  dataset_updating_data["embd_id"] = req["embedding_model_id"]
235
  else:
236
- return construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document in this "
237
- "dataset, so you cannot change the embedding "
238
- "model.")
 
239
  # only if chunk_num is 0, the user can update the chunk_method
240
  if "chunk_method" in req:
241
  type_value = req["chunk_method"]
@@ -614,35 +615,39 @@ def download_document(dataset_id, document_id):
614
 
615
  # ----------------------------start parsing a document-----------------------------------------------------
616
  # helper method for parsing
617
- def dummy(prog=None, msg=""):
618
- pass
 
 
 
619
 
620
 
621
- def doc_parse(binary, doc_name, parser_name, tenant_id):
622
  match parser_name:
623
  case "book":
624
- book.chunk(doc_name, binary=binary, callback=dummy)
625
  case "laws":
626
- laws.chunk(doc_name, binary=binary, callback=dummy)
627
  case "manual":
628
- manual.chunk(doc_name, binary=binary, callback=dummy)
629
  case "naive":
630
  # It's the mode by default, which is general in the front-end
631
- naive.chunk(doc_name, binary=binary, callback=dummy)
632
  case "one":
633
- one.chunk(doc_name, binary=binary, callback=dummy)
634
  case "paper":
635
- paper.chunk(doc_name, binary=binary, callback=dummy)
636
  case "picture":
637
- picture.chunk(doc_name, binary=binary, tenant_id=tenant_id, lang="Chinese", callback=dummy)
 
638
  case "presentation":
639
- presentation.chunk(doc_name, binary=binary, callback=dummy)
640
  case "qa":
641
- qa.chunk(doc_name, binary=binary, callback=dummy)
642
  case "resume":
643
- resume.chunk(doc_name, binary=binary, callback=dummy)
644
  case "table":
645
- table.chunk(doc_name, binary=binary, callback=dummy)
646
  case _:
647
  return False
648
 
@@ -658,13 +663,8 @@ def parse_document(dataset_id, document_id):
658
  if not exist:
659
  return construct_json_result(code=RetCode.DATA_ERROR,
660
  message=f"This dataset '{dataset_id}' cannot be found!")
661
- message = ""
662
- res = get_message_during_parsing_document(document_id, message)
663
- if isinstance(res, str):
664
- message += res
665
- return construct_json_result(code=RetCode.SUCCESS, message=message)
666
- else:
667
- return res
668
 
669
  except Exception as e:
670
  return construct_error_response(e)
@@ -680,34 +680,31 @@ def parse_documents(dataset_id):
680
  if not exist:
681
  return construct_json_result(code=RetCode.DATA_ERROR,
682
  message=f"This dataset '{dataset_id}' cannot be found!")
683
-
684
- def process(doc_ids):
685
- message = ""
686
- # for loop
687
- for id in doc_ids:
688
- res = get_message_during_parsing_document(id, message)
689
- if isinstance(res, str):
690
- message += res
691
- else:
692
- return res
693
- return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
694
-
695
  # two conditions
696
- if doc_ids:
697
- return process(doc_ids)
698
- else:
699
  # documents inside the dataset
700
  docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
701
  True, "")
702
  doc_ids = [doc["id"] for doc in docs]
703
- return process(doc_ids)
 
 
 
 
 
 
 
 
 
 
704
 
705
  except Exception as e:
706
  return construct_error_response(e)
707
 
708
 
709
- # helper method for getting message or response when parsing the document
710
- def get_message_during_parsing_document(id, message):
 
711
  try:
712
  # Check whether there is this document
713
  exist, document = DocumentService.get_by_id(id)
@@ -736,7 +733,7 @@ def get_message_during_parsing_document(id, message):
736
  binary = MINIO.get(bucket, doc_name)
737
  parser_name = doc_attributes["parser_id"]
738
  if binary:
739
- res = doc_parse(binary, doc_name, parser_name, tenant_id)
740
  if res is False:
741
  message += f"The parser id: {parser_name} of the document {doc_id} is not supported; "
742
  else:
@@ -744,10 +741,94 @@ def get_message_during_parsing_document(id, message):
744
  # failed in parsing
745
  if doc_attributes["status"] == TaskStatus.FAIL.value:
746
  message += f"Failed in parsing the document: {doc_id}; "
747
- return message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  except Exception as e:
749
  return construct_error_response(e)
750
- # ----------------------------stop parsing-----------------------------------------------------
751
 
752
  # ----------------------------show the status of the file-----------------------------------------------------
753
  @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
@@ -774,6 +855,7 @@ def show_parsing_status(dataset_id, document_id):
774
  )
775
  except Exception as e:
776
  return construct_error_response(e)
 
777
  # ----------------------------list the chunks of the file-----------------------------------------------------
778
 
779
  # -- --------------------------delete the chunk-----------------------------------------------------
 
16
  import pathlib
17
  import re
18
  import warnings
19
+ from functools import partial
20
  from io import BytesIO
21
 
22
  from elasticsearch_dsl import Q
 
27
  from api.contants import NAME_LENGTH_LIMIT
28
  from api.db import FileType, ParserType, FileSource, TaskStatus
29
  from api.db import StatusEnum
30
+ from api.db.db_models import File
31
  from api.db.services import duplicate_name
32
  from api.db.services.document_service import DocumentService
33
  from api.db.services.file2document_service import File2DocumentService
34
  from api.db.services.file_service import FileService
35
  from api.db.services.knowledgebase_service import KnowledgebaseService
 
36
  from api.db.services.user_service import TenantService
37
  from api.settings import RetCode
38
  from api.utils import get_uuid
 
233
  if chunk_num == 0:
234
  dataset_updating_data["embd_id"] = req["embedding_model_id"]
235
  else:
236
+ return construct_json_result(code=RetCode.DATA_ERROR,
237
+ message="You have already parsed the document in this "
238
+ "dataset, so you cannot change the embedding "
239
+ "model.")
240
  # only if chunk_num is 0, the user can update the chunk_method
241
  if "chunk_method" in req:
242
  type_value = req["chunk_method"]
 
615
 
616
  # ----------------------------start parsing a document-----------------------------------------------------
617
  # helper method for parsing
618
+ # callback method
619
+ def doc_parse_callback(doc_id, prog=None, msg=""):
620
+ cancel = DocumentService.do_cancel(doc_id)
621
+ if cancel:
622
+ raise Exception("The parsing process has been cancelled!")
623
 
624
 
625
+ def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
626
  match parser_name:
627
  case "book":
628
+ book.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
629
  case "laws":
630
+ laws.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
631
  case "manual":
632
+ manual.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
633
  case "naive":
634
  # It's the mode by default, which is general in the front-end
635
+ naive.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
636
  case "one":
637
+ one.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
638
  case "paper":
639
+ paper.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
640
  case "picture":
641
+ picture.chunk(doc_name, binary=binary, tenant_id=tenant_id, lang="Chinese",
642
+ callback=partial(doc_parse_callback, doc_id))
643
  case "presentation":
644
+ presentation.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
645
  case "qa":
646
+ qa.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
647
  case "resume":
648
+ resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
649
  case "table":
650
+ table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
651
  case _:
652
  return False
653
 
 
663
  if not exist:
664
  return construct_json_result(code=RetCode.DATA_ERROR,
665
  message=f"This dataset '{dataset_id}' cannot be found!")
666
+
667
+ return parsing_document_internal(document_id)
 
 
 
 
 
668
 
669
  except Exception as e:
670
  return construct_error_response(e)
 
680
  if not exist:
681
  return construct_json_result(code=RetCode.DATA_ERROR,
682
  message=f"This dataset '{dataset_id}' cannot be found!")
 
 
 
 
 
 
 
 
 
 
 
 
683
  # two conditions
684
+ if not doc_ids:
 
 
685
  # documents inside the dataset
686
  docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
687
  True, "")
688
  doc_ids = [doc["id"] for doc in docs]
689
+
690
+ message = ""
691
+ # for loop
692
+ for id in doc_ids:
693
+ res = parsing_document_internal(id)
694
+ res_body = res.json
695
+ if res_body["code"] == RetCode.SUCCESS:
696
+ message += res_body["message"]
697
+ else:
698
+ return res
699
+ return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
700
 
701
  except Exception as e:
702
  return construct_error_response(e)
703
 
704
 
705
+ # helper method for parsing the document
706
+ def parsing_document_internal(id):
707
+ message = ""
708
  try:
709
  # Check whether there is this document
710
  exist, document = DocumentService.get_by_id(id)
 
733
  binary = MINIO.get(bucket, doc_name)
734
  parser_name = doc_attributes["parser_id"]
735
  if binary:
736
+ res = doc_parse(binary, doc_name, parser_name, tenant_id, doc_id)
737
  if res is False:
738
  message += f"The parser id: {parser_name} of the document {doc_id} is not supported; "
739
  else:
 
741
  # failed in parsing
742
  if doc_attributes["status"] == TaskStatus.FAIL.value:
743
  message += f"Failed in parsing the document: {doc_id}; "
744
+ return construct_json_result(code=RetCode.SUCCESS, message=message)
745
+ except Exception as e:
746
+ return construct_error_response(e)
747
+
748
+
749
+ # ----------------------------stop parsing a doc-----------------------------------------------------
750
+ @manager.route("<dataset_id>/documents/<document_id>/status", methods=["DELETE"])
751
+ @login_required
752
+ def stop_parsing_document(dataset_id, document_id):
753
+ try:
754
+ # valid dataset
755
+ exist, _ = KnowledgebaseService.get_by_id(dataset_id)
756
+ if not exist:
757
+ return construct_json_result(code=RetCode.DATA_ERROR,
758
+ message=f"This dataset '{dataset_id}' cannot be found!")
759
+
760
+ return stop_parsing_document_internal(document_id)
761
+
762
+ except Exception as e:
763
+ return construct_error_response(e)
764
+
765
+
766
+ # ----------------------------stop parsing docs-----------------------------------------------------
767
+ @manager.route("<dataset_id>/documents/status", methods=["DELETE"])
768
+ @login_required
769
+ def stop_parsing_documents(dataset_id):
770
+ doc_ids = request.json["doc_ids"]
771
+ try:
772
+ # valid dataset?
773
+ exist, _ = KnowledgebaseService.get_by_id(dataset_id)
774
+ if not exist:
775
+ return construct_json_result(code=RetCode.DATA_ERROR,
776
+ message=f"This dataset '{dataset_id}' cannot be found!")
777
+ if not doc_ids:
778
+ # documents inside the dataset
779
+ docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
780
+ True, "")
781
+ doc_ids = [doc["id"] for doc in docs]
782
+
783
+ message = ""
784
+ # for loop
785
+ for id in doc_ids:
786
+ res = stop_parsing_document_internal(id)
787
+ res_body = res.json
788
+ if res_body["code"] == RetCode.SUCCESS:
789
+ message += res_body["message"]
790
+ else:
791
+ return res
792
+ return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
793
+
794
+ except Exception as e:
795
+ return construct_error_response(e)
796
+
797
+
798
+ # Helper method
799
+ def stop_parsing_document_internal(document_id):
800
+ try:
801
+ # valid doc?
802
+ exist, doc = DocumentService.get_by_id(document_id)
803
+ if not exist:
804
+ return construct_json_result(message=f"This document '{document_id}' cannot be found!",
805
+ code=RetCode.ARGUMENT_ERROR)
806
+ doc_attributes = doc.to_dict()
807
+
808
+ # only when the status is parsing, we need to stop it
809
+ if doc_attributes["status"] == TaskStatus.RUNNING.value:
810
+ tenant_id = DocumentService.get_tenant_id(document_id)
811
+ if not tenant_id:
812
+ return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
813
+
814
+ # update successfully?
815
+ if not DocumentService.update_by_id(document_id, {"status": "2"}): # cancel
816
+ return construct_json_result(
817
+ code=RetCode.OPERATING_ERROR,
818
+ message="There was an error during the stopping parsing the document process. "
819
+ "Please check the status of the RAGFlow server and try the update again."
820
+ )
821
+
822
+ _, doc_attributes = DocumentService.get_by_id(document_id)
823
+ doc_attributes = doc_attributes.to_dict()
824
+
825
+ # failed in stop parsing
826
+ if doc_attributes["status"] == TaskStatus.RUNNING.value:
827
+ return construct_json_result(message=f"Failed in parsing the document: {document_id}; ", code=RetCode.SUCCESS)
828
+ return construct_json_result(code=RetCode.SUCCESS, message="")
829
  except Exception as e:
830
  return construct_error_response(e)
831
+
832
 
833
  # ----------------------------show the status of the file-----------------------------------------------------
834
  @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
 
855
  )
856
  except Exception as e:
857
  return construct_error_response(e)
858
+
859
  # ----------------------------list the chunks of the file-----------------------------------------------------
860
 
861
  # -- --------------------------delete the chunk-----------------------------------------------------
api/db/services/document_service.py CHANGED
@@ -333,6 +333,17 @@ class DocumentService(CommonService):
333
  cls.model.kb_id == kb_id).dicts())
334
 
335
 
 
 
 
 
 
 
 
 
 
 
 
336
  def queue_raptor_tasks(doc):
337
  def new_task():
338
  nonlocal doc
@@ -347,4 +358,4 @@ def queue_raptor_tasks(doc):
347
  task = new_task()
348
  bulk_insert_into_db(Task, [task], True)
349
  task["type"] = "raptor"
350
- assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status."
 
333
  cls.model.kb_id == kb_id).dicts())
334
 
335
 
336
+ @classmethod
337
+ @DB.connection_context()
338
+ def do_cancel(cls, doc_id):
339
+ try:
340
+ _, doc = DocumentService.get_by_id(doc_id)
341
+ return doc.run == TaskStatus.CANCEL.value or doc.progress < 0
342
+ except Exception as e:
343
+ pass
344
+ return False
345
+
346
+
347
  def queue_raptor_tasks(doc):
348
  def new_task():
349
  nonlocal doc
 
358
  task = new_task()
359
  bulk_insert_into_db(Task, [task], True)
360
  task["type"] = "raptor"
361
+ assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status."
docs/references/ragflow_api.md CHANGED
@@ -758,7 +758,7 @@ This method enables a specific document to start parsing for a specific user.
758
  ```json
759
  {
760
  "code": 102,
761
- "message": "This dataset 'imagination.txt' cannot be found!"
762
  }
763
  ```
764
 
 
758
  ```json
759
  {
760
  "code": 102,
761
+ "message": "This dataset 'imagination' cannot be found!"
762
  }
763
  ```
764
 
sdk/python/ragflow/ragflow.py CHANGED
@@ -157,6 +157,17 @@ class RAGFlow:
157
  return res.json()
158
 
159
  # ----------------------------stop parsing-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  # ----------------------------show the status of the file-----------------------------------------------------
162
  def show_parsing_status(self, dataset_id, document_id):
 
157
  return res.json()
158
 
159
  # ----------------------------stop parsing-----------------------------------------------------
160
+ def stop_parsing_document(self, dataset_id, document_id):
161
+ endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}/status"
162
+ res = requests.delete(endpoint, headers=self.authorization_header)
163
+
164
+ return res.json()
165
+
166
+ def stop_parsing_documents(self, dataset_id, doc_ids=None):
167
+ endpoint = f"{self.dataset_url}/{dataset_id}/documents/status"
168
+ res = requests.delete(endpoint, headers=self.authorization_header, json={"doc_ids": doc_ids})
169
+
170
+ return res.json()
171
 
172
  # ----------------------------show the status of the file-----------------------------------------------------
173
  def show_parsing_status(self, dataset_id, document_id):
sdk/python/test/test_document.py CHANGED
@@ -949,7 +949,126 @@ class TestFile(TestSdk):
949
  # parse
950
  res = ragflow.start_parsing_documents(created_res_id, doc_ids)
951
  assert res["code"] == RetCode.SUCCESS and res["message"] == ""
 
952
  # ----------------------------stop parsing-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
953
 
954
  # ----------------------------show the status of the file-----------------------------------------------------
955
  def test_show_status_with_success(self):
 
949
  # parse
950
  res = ragflow.start_parsing_documents(created_res_id, doc_ids)
951
  assert res["code"] == RetCode.SUCCESS and res["message"] == ""
952
+
953
  # ----------------------------stop parsing-----------------------------------------------------
954
+ def test_stop_parsing_document_with_success(self):
955
+ """
956
+ Test the stopping parsing of a document with success.
957
+ """
958
+ # create a dataset
959
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
960
+ created_res = ragflow.create_dataset("test_start_parsing_document_with_success")
961
+ created_res_id = created_res["data"]["dataset_id"]
962
+ # upload files
963
+ file_paths = ["test_data/lol.txt"]
964
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
965
+ # get the doc_id
966
+ data = uploading_res["data"][0]
967
+ doc_id = data["id"]
968
+ # parse file
969
+ res = ragflow.start_parsing_document(created_res_id, doc_id)
970
+ assert res["code"] == RetCode.SUCCESS and res["message"] == ""
971
+ res = ragflow.stop_parsing_document(created_res_id, doc_id)
972
+ assert res["code"] == RetCode.SUCCESS and res["message"] == ""
973
+
974
+ def test_stop_parsing_nonexistent_document(self):
975
+ """
976
+ Test the stopping parsing a document which does not exist.
977
+ """
978
+ # create a dataset
979
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
980
+ created_res = ragflow.create_dataset("test_start_parsing_nonexistent_document")
981
+ created_res_id = created_res["data"]["dataset_id"]
982
+ res = ragflow.stop_parsing_document(created_res_id, "imagination.txt")
983
+ assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "This document 'imagination.txt' cannot be found!"
984
+
985
+ def test_stop_parsing_document_in_nonexistent_dataset(self):
986
+ """
987
+ Test the stopping parsing a document whose dataset is nonexistent.
988
+ """
989
+ # create a dataset
990
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
991
+ created_res = ragflow.create_dataset("test_download_nonexistent_document")
992
+ created_res_id = created_res["data"]["dataset_id"]
993
+ # upload files
994
+ file_paths = ["test_data/test.txt"]
995
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
996
+ # get the doc_id
997
+ data = uploading_res["data"][0]
998
+ doc_id = data["id"]
999
+ # parse
1000
+ res = ragflow.stop_parsing_document("imagination", doc_id)
1001
+ assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
1002
+
1003
+ # ------------------------stop parsing multiple documents----------------------------
1004
+ def test_stop_parsing_documents_in_nonexistent_dataset(self):
1005
+ """
1006
+ Test the stopping parsing documents whose dataset is nonexistent.
1007
+ """
1008
+ # create a dataset
1009
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1010
+ created_res = ragflow.create_dataset("test_download_nonexistent_document")
1011
+ created_res_id = created_res["data"]["dataset_id"]
1012
+ # upload files
1013
+ file_paths = ["test_data/test.txt"]
1014
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
1015
+ # parse
1016
+ res = ragflow.stop_parsing_documents("imagination")
1017
+ assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
1018
+
1019
+ def test_stop_parsing_multiple_documents(self):
1020
+ """
1021
+ Test the stopping parsing documents with a success.
1022
+ """
1023
+ # create a dataset
1024
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1025
+ created_res = ragflow.create_dataset("test_start_parsing_multiple_documents")
1026
+ created_res_id = created_res["data"]["dataset_id"]
1027
+ # upload files
1028
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
1029
+ ragflow.upload_local_file(created_res_id, file_paths)
1030
+ res = ragflow.start_parsing_documents(created_res_id)
1031
+ assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1032
+
1033
+ res = ragflow.stop_parsing_documents(created_res_id)
1034
+ assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1035
+
1036
+ def test_stop_parsing_multiple_documents_with_one_empty_file(self):
1037
+ """
1038
+ Test the stopping parsing documents, one of which is empty.
1039
+ """
1040
+ # create a dataset
1041
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1042
+ created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
1043
+ created_res_id = created_res["data"]["dataset_id"]
1044
+ # upload files
1045
+ file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
1046
+ ragflow.upload_local_file(created_res_id, file_paths)
1047
+ res = ragflow.start_parsing_documents(created_res_id)
1048
+ assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
1049
+ res = ragflow.stop_parsing_documents(created_res_id)
1050
+ assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1051
+
1052
+ def test_stop_parsing_multiple_specific_documents(self):
1053
+ """
1054
+ Test the stopping parsing documents whose document ids are specified.
1055
+ """
1056
+ # create a dataset
1057
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1058
+ created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
1059
+ created_res_id = created_res["data"]["dataset_id"]
1060
+ # upload files
1061
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
1062
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
1063
+ # get the doc_id
1064
+ data = uploading_res["data"]
1065
+ doc_ids = []
1066
+ for d in data:
1067
+ doc_ids.append(d["id"])
1068
+ res = ragflow.start_parsing_documents(created_res_id, doc_ids)
1069
+ assert res["code"] == RetCode.SUCCESS and res["message"] == ""
1070
+ res = ragflow.stop_parsing_documents(created_res_id, doc_ids)
1071
+ assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1072
 
1073
  # ----------------------------show the status of the file-----------------------------------------------------
1074
  def test_show_status_with_success(self):