yangdx
commited on
Commit
·
d0b1ac0
1
Parent(s):
72ece4e
Add max nodes limit for graph retrieval of networkX
Browse files• Set MAX_GRAPH_NODES env var (default 1000)
• Change edge type to "RELATED"
- .env.example +1 -0
- lightrag/api/routers/graph_routes.py +17 -2
- lightrag/kg/networkx_impl.py +8 -6
.env.example
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
# PORT=9621
|
| 4 |
# WORKERS=1
|
| 5 |
# NAMESPACE_PREFIX=lightrag # separating data from difference Lightrag instances
|
|
|
|
| 6 |
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
|
| 7 |
|
| 8 |
### Optional SSL Configuration
|
|
|
|
| 3 |
# PORT=9621
|
| 4 |
# WORKERS=1
|
| 5 |
# NAMESPACE_PREFIX=lightrag # separating data from difference Lightrag instances
|
| 6 |
+
# MAX_GRAPH_NODES=1000 # Max nodes return from grap retrieval
|
| 7 |
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
|
| 8 |
|
| 9 |
### Optional SSL Configuration
|
lightrag/api/routers/graph_routes.py
CHANGED
|
@@ -16,12 +16,27 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
|
|
| 16 |
|
| 17 |
@router.get("/graph/label/list", dependencies=[Depends(optional_api_key)])
|
| 18 |
async def get_graph_labels():
|
| 19 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
return await rag.get_graph_labels()
|
| 21 |
|
| 22 |
@router.get("/graphs", dependencies=[Depends(optional_api_key)])
|
| 23 |
async def get_knowledge_graph(label: str, max_depth: int = 3):
|
| 24 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
|
| 26 |
|
| 27 |
return router
|
|
|
|
| 16 |
|
| 17 |
@router.get("/graph/label/list", dependencies=[Depends(optional_api_key)])
|
| 18 |
async def get_graph_labels():
|
| 19 |
+
"""
|
| 20 |
+
Get all graph labels
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
List[str]: List of graph labels
|
| 24 |
+
"""
|
| 25 |
return await rag.get_graph_labels()
|
| 26 |
|
| 27 |
@router.get("/graphs", dependencies=[Depends(optional_api_key)])
|
| 28 |
async def get_knowledge_graph(label: str, max_depth: int = 3):
|
| 29 |
+
"""
|
| 30 |
+
Get knowledge graph for a specific label.
|
| 31 |
+
Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
label (str): Label to get knowledge graph for
|
| 35 |
+
max_depth (int, optional): Maximum depth of graph. Defaults to 3.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Dict[str, List[str]]: Knowledge graph for label
|
| 39 |
+
"""
|
| 40 |
return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
|
| 41 |
|
| 42 |
return router
|
lightrag/kg/networkx_impl.py
CHANGED
|
@@ -24,6 +24,8 @@ from .shared_storage import (
|
|
| 24 |
is_multiprocess,
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
| 27 |
|
| 28 |
@final
|
| 29 |
@dataclass
|
|
@@ -234,6 +236,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
| 234 |
) -> KnowledgeGraph:
|
| 235 |
"""
|
| 236 |
Get complete connected subgraph for specified node (including the starting node itself)
|
|
|
|
| 237 |
|
| 238 |
Args:
|
| 239 |
node_label: Label of the starting node
|
|
@@ -269,18 +272,17 @@ class NetworkXStorage(BaseGraphStorage):
|
|
| 269 |
subgraph = nx.ego_graph(graph, nodes_to_explore[0], radius=max_depth)
|
| 270 |
|
| 271 |
# Check if number of nodes exceeds max_graph_nodes
|
| 272 |
-
|
| 273 |
-
if len(subgraph.nodes()) > max_graph_nodes:
|
| 274 |
origin_nodes = len(subgraph.nodes())
|
| 275 |
node_degrees = dict(subgraph.degree())
|
| 276 |
top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
|
| 277 |
-
:
|
| 278 |
]
|
| 279 |
top_node_ids = [node[0] for node in top_nodes]
|
| 280 |
-
# Create new subgraph
|
| 281 |
subgraph = subgraph.subgraph(top_node_ids)
|
| 282 |
logger.info(
|
| 283 |
-
f"Reduced graph from {origin_nodes} nodes to {
|
| 284 |
)
|
| 285 |
|
| 286 |
# Add nodes to result
|
|
@@ -320,7 +322,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
| 320 |
result.edges.append(
|
| 321 |
KnowledgeGraphEdge(
|
| 322 |
id=edge_id,
|
| 323 |
-
type="
|
| 324 |
source=str(source),
|
| 325 |
target=str(target),
|
| 326 |
properties=edge_data,
|
|
|
|
| 24 |
is_multiprocess,
|
| 25 |
)
|
| 26 |
|
| 27 |
+
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
| 28 |
+
|
| 29 |
|
| 30 |
@final
|
| 31 |
@dataclass
|
|
|
|
| 236 |
) -> KnowledgeGraph:
|
| 237 |
"""
|
| 238 |
Get complete connected subgraph for specified node (including the starting node itself)
|
| 239 |
+
Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
|
| 240 |
|
| 241 |
Args:
|
| 242 |
node_label: Label of the starting node
|
|
|
|
| 272 |
subgraph = nx.ego_graph(graph, nodes_to_explore[0], radius=max_depth)
|
| 273 |
|
| 274 |
# Check if number of nodes exceeds max_graph_nodes
|
| 275 |
+
if len(subgraph.nodes()) > MAX_GRAPH_NODES:
|
|
|
|
| 276 |
origin_nodes = len(subgraph.nodes())
|
| 277 |
node_degrees = dict(subgraph.degree())
|
| 278 |
top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
|
| 279 |
+
:MAX_GRAPH_NODES
|
| 280 |
]
|
| 281 |
top_node_ids = [node[0] for node in top_nodes]
|
| 282 |
+
# Create new subgraph and keep nodes only with most degree
|
| 283 |
subgraph = subgraph.subgraph(top_node_ids)
|
| 284 |
logger.info(
|
| 285 |
+
f"Reduced graph from {origin_nodes} nodes to {MAX_GRAPH_NODES} nodes (depth={max_depth})"
|
| 286 |
)
|
| 287 |
|
| 288 |
# Add nodes to result
|
|
|
|
| 322 |
result.edges.append(
|
| 323 |
KnowledgeGraphEdge(
|
| 324 |
id=edge_id,
|
| 325 |
+
type="RELATED",
|
| 326 |
source=str(source),
|
| 327 |
target=str(target),
|
| 328 |
properties=edge_data,
|