Spaces:

rm-lht
/

lightrag

Configuration error

tackhwa commited on Apr 22

Commit

db3c974

1 Parent(s): cfb0884

remove regex

Files changed (2) hide show

lightrag/operate.py CHANGED Viewed

@@ -18,7 +18,6 @@ from .utils import (
     normalize_extracted_info,
     pack_user_ass_to_openai_messages,
     split_string_by_multi_markers,
-    extract_fixed_parenthesized_content,
     truncate_list_by_token_size,
     process_combine_contexts,
     compute_args_hash,
@@ -153,7 +152,7 @@ async def _handle_single_entity_extraction(
     chunk_key: str,
     file_path: str = "unknown_source",
 ):
-    if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
         return None
     # Clean and validate entity name
@@ -199,7 +198,7 @@ async def _handle_single_relationship_extraction(
     chunk_key: str,
     file_path: str = "unknown_source",
 ):
-    if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
         return None
     # add this record as edge
     source = clean_str(record_attributes[1])
@@ -550,8 +549,6 @@ async def extract_entities(
             [context_base["record_delimiter"], context_base["completion_delimiter"]],
         )
-        records = extract_fixed_parenthesized_content(records)
         for record in records:
             record = re.search(r"\((.*)\)", record)
             if record is None:

     normalize_extracted_info,
     pack_user_ass_to_openai_messages,
     split_string_by_multi_markers,
     truncate_list_by_token_size,
     process_combine_contexts,
     compute_args_hash,
     chunk_key: str,
     file_path: str = "unknown_source",
 ):
+    if len(record_attributes) < 4 or '"entity"' not in record_attributes[0]:
         return None
     # Clean and validate entity name
     chunk_key: str,
     file_path: str = "unknown_source",
 ):
+    if len(record_attributes) < 5 or '"relationship"' not in record_attributes[0]:
         return None
     # add this record as edge
     source = clean_str(record_attributes[1])
             [context_base["record_delimiter"], context_base["completion_delimiter"]],
         )
         for record in records:
             record = re.search(r"\((.*)\)", record)
             if record is None:

lightrag/utils.py CHANGED Viewed

@@ -408,33 +408,6 @@ def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]
     return [r.strip() for r in results if r.strip()]
-def extract_fixed_parenthesized_content(records: list[str]) -> list[str]:
-    """
-    Extract content that should be in parentheses from each record.
-    Ensures each extracted item has both opening and closing parentheses.
-    """
-    result = []
-    for record in records:
-        # First, extract properly matched pairs
-        balanced_matches = re.findall(r"\((.*?)\)", record)
-        for match in balanced_matches:
-            result.append(f"({match})")
-        # Process string to handle unbalanced parentheses
-        # For opening without closing
-        open_matches = re.findall(r"\(([^()]*?)$", record)
-        for match in open_matches:
-            result.append(f"({match})")
-        # For closing without opening
-        close_matches = re.findall(r"^([^()]*?)\)", record)
-        for match in close_matches:
-            result.append(f"({match})")
-    return result
 # Refer the utils functions of the official GraphRAG implementation:
 # https://github.com/microsoft/graphrag
 def clean_str(input: Any) -> str:

     return [r.strip() for r in results if r.strip()]
 # Refer the utils functions of the official GraphRAG implementation:
 # https://github.com/microsoft/graphrag
 def clean_str(input: Any) -> str: