cleanup extraction
Browse files- lightrag/kg/networkx_impl.py +2 -1
- lightrag/lightrag.py +2 -8
- lightrag/operate.py +12 -12
lightrag/kg/networkx_impl.py
CHANGED
|
@@ -16,13 +16,14 @@ import pipmaster as pm
|
|
| 16 |
|
| 17 |
if not pm.is_installed("networkx"):
|
| 18 |
pm.install("networkx")
|
| 19 |
-
|
| 20 |
if not pm.is_installed("graspologic"):
|
| 21 |
pm.install("graspologic")
|
| 22 |
|
| 23 |
import networkx as nx
|
| 24 |
from graspologic import embed
|
| 25 |
|
|
|
|
| 26 |
@final
|
| 27 |
@dataclass
|
| 28 |
class NetworkXStorage(BaseGraphStorage):
|
|
|
|
| 16 |
|
| 17 |
if not pm.is_installed("networkx"):
|
| 18 |
pm.install("networkx")
|
| 19 |
+
|
| 20 |
if not pm.is_installed("graspologic"):
|
| 21 |
pm.install("graspologic")
|
| 22 |
|
| 23 |
import networkx as nx
|
| 24 |
from graspologic import embed
|
| 25 |
|
| 26 |
+
|
| 27 |
@final
|
| 28 |
@dataclass
|
| 29 |
class NetworkXStorage(BaseGraphStorage):
|
lightrag/lightrag.py
CHANGED
|
@@ -184,7 +184,7 @@ class LightRAG:
|
|
| 184 |
"""Maximum number of concurrent embedding function calls."""
|
| 185 |
|
| 186 |
embedding_cache_config: dict[str, Any] = field(
|
| 187 |
-
default_factory=
|
| 188 |
"enabled": False,
|
| 189 |
"similarity_threshold": 0.95,
|
| 190 |
"use_llm_check": False,
|
|
@@ -727,7 +727,7 @@ class LightRAG:
|
|
| 727 |
|
| 728 |
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
|
| 729 |
try:
|
| 730 |
-
|
| 731 |
chunk,
|
| 732 |
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
| 733 |
entity_vdb=self.entities_vdb,
|
|
@@ -735,12 +735,6 @@ class LightRAG:
|
|
| 735 |
llm_response_cache=self.llm_response_cache,
|
| 736 |
global_config=asdict(self),
|
| 737 |
)
|
| 738 |
-
if new_kg is None:
|
| 739 |
-
logger.info("No new entities or relationships extracted.")
|
| 740 |
-
else:
|
| 741 |
-
logger.info("New entities or relationships extracted.")
|
| 742 |
-
self.chunk_entity_relation_graph = new_kg
|
| 743 |
-
|
| 744 |
except Exception as e:
|
| 745 |
logger.error("Failed to extract entities and relationships")
|
| 746 |
raise e
|
|
|
|
| 184 |
"""Maximum number of concurrent embedding function calls."""
|
| 185 |
|
| 186 |
embedding_cache_config: dict[str, Any] = field(
|
| 187 |
+
default_factory=lambda: {
|
| 188 |
"enabled": False,
|
| 189 |
"similarity_threshold": 0.95,
|
| 190 |
"use_llm_check": False,
|
|
|
|
| 727 |
|
| 728 |
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
|
| 729 |
try:
|
| 730 |
+
await extract_entities(
|
| 731 |
chunk,
|
| 732 |
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
| 733 |
entity_vdb=self.entities_vdb,
|
|
|
|
| 735 |
llm_response_cache=self.llm_response_cache,
|
| 736 |
global_config=asdict(self),
|
| 737 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
except Exception as e:
|
| 739 |
logger.error("Failed to extract entities and relationships")
|
| 740 |
raise e
|
lightrag/operate.py
CHANGED
|
@@ -329,7 +329,7 @@ async def extract_entities(
|
|
| 329 |
relationships_vdb: BaseVectorStorage,
|
| 330 |
global_config: dict[str, str],
|
| 331 |
llm_response_cache: BaseKVStorage | None = None,
|
| 332 |
-
) ->
|
| 333 |
use_llm_func: callable = global_config["llm_model_func"]
|
| 334 |
entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
|
| 335 |
enable_llm_cache_for_entity_extract: bool = global_config[
|
|
@@ -522,16 +522,18 @@ async def extract_entities(
|
|
| 522 |
]
|
| 523 |
)
|
| 524 |
|
| 525 |
-
if not
|
| 526 |
-
logger.
|
| 527 |
-
|
| 528 |
-
)
|
| 529 |
-
return None
|
| 530 |
|
| 531 |
-
if not
|
| 532 |
-
logger.
|
| 533 |
-
if not
|
| 534 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
|
| 536 |
if entity_vdb is not None:
|
| 537 |
data_for_vdb = {
|
|
@@ -560,8 +562,6 @@ async def extract_entities(
|
|
| 560 |
}
|
| 561 |
await relationships_vdb.upsert(data_for_vdb)
|
| 562 |
|
| 563 |
-
return knowledge_graph_inst
|
| 564 |
-
|
| 565 |
|
| 566 |
async def kg_query(
|
| 567 |
query: str,
|
|
|
|
| 329 |
relationships_vdb: BaseVectorStorage,
|
| 330 |
global_config: dict[str, str],
|
| 331 |
llm_response_cache: BaseKVStorage | None = None,
|
| 332 |
+
) -> None:
|
| 333 |
use_llm_func: callable = global_config["llm_model_func"]
|
| 334 |
entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
|
| 335 |
enable_llm_cache_for_entity_extract: bool = global_config[
|
|
|
|
| 522 |
]
|
| 523 |
)
|
| 524 |
|
| 525 |
+
if not (all_entities_data or all_relationships_data):
|
| 526 |
+
logger.info("Didn't extract any entities and relationships.")
|
| 527 |
+
return
|
|
|
|
|
|
|
| 528 |
|
| 529 |
+
if not all_entities_data:
|
| 530 |
+
logger.info("Didn't extract any entities")
|
| 531 |
+
if not all_relationships_data:
|
| 532 |
+
logger.info("Didn't extract any relationships")
|
| 533 |
+
|
| 534 |
+
logger.info(
|
| 535 |
+
f"New entities or relationships extracted, entities:{all_entities_data}, relationships:{all_relationships_data}"
|
| 536 |
+
)
|
| 537 |
|
| 538 |
if entity_vdb is not None:
|
| 539 |
data_for_vdb = {
|
|
|
|
| 562 |
}
|
| 563 |
await relationships_vdb.upsert(data_for_vdb)
|
| 564 |
|
|
|
|
|
|
|
| 565 |
|
| 566 |
async def kg_query(
|
| 567 |
query: str,
|