yangdx
commited on
Commit
·
a55a783
1
Parent(s):
bbf070e
Fix linting
Browse files- lightrag/kg/networkx_impl.py +24 -20
lightrag/kg/networkx_impl.py
CHANGED
@@ -184,7 +184,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
184 |
# else:
|
185 |
# labels.add(node_data["entity_type"])
|
186 |
labels.add(str(node)) # Add node id as a label
|
187 |
-
|
188 |
# Return sorted list
|
189 |
return sorted(list(labels))
|
190 |
|
@@ -193,52 +193,58 @@ class NetworkXStorage(BaseGraphStorage):
|
|
193 |
) -> KnowledgeGraph:
|
194 |
"""
|
195 |
Get complete connected subgraph for specified node (including the starting node itself)
|
196 |
-
|
197 |
Args:
|
198 |
node_label: Label of the starting node
|
199 |
max_depth: Maximum depth of the subgraph
|
200 |
-
|
201 |
Returns:
|
202 |
KnowledgeGraph object containing nodes and edges
|
203 |
"""
|
204 |
result = KnowledgeGraph()
|
205 |
seen_nodes = set()
|
206 |
seen_edges = set()
|
207 |
-
|
208 |
# Handle special case for "*" label
|
209 |
if node_label == "*":
|
210 |
# For "*", return the entire graph including all nodes and edges
|
211 |
-
subgraph =
|
|
|
|
|
212 |
else:
|
213 |
# Find nodes with matching node id (partial match)
|
214 |
nodes_to_explore = []
|
215 |
for n, attr in self._graph.nodes(data=True):
|
216 |
if node_label in str(n): # Use partial matching
|
217 |
nodes_to_explore.append(n)
|
218 |
-
|
219 |
if not nodes_to_explore:
|
220 |
logger.warning(f"No nodes found with label {node_label}")
|
221 |
return result
|
222 |
|
223 |
# Get subgraph using ego_graph
|
224 |
subgraph = nx.ego_graph(self._graph, nodes_to_explore[0], radius=max_depth)
|
225 |
-
|
226 |
# Check if number of nodes exceeds max_graph_nodes
|
227 |
-
max_graph_nodes=500
|
228 |
if len(subgraph.nodes()) > max_graph_nodes:
|
229 |
-
origin_nodes=len(subgraph.nodes())
|
230 |
node_degrees = dict(subgraph.degree())
|
231 |
-
top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
|
|
|
|
|
232 |
top_node_ids = [node[0] for node in top_nodes]
|
233 |
# Create new subgraph with only top nodes
|
234 |
subgraph = subgraph.subgraph(top_node_ids)
|
235 |
-
logger.info(
|
|
|
|
|
236 |
|
237 |
# Add nodes to result
|
238 |
for node in subgraph.nodes():
|
239 |
if str(node) in seen_nodes:
|
240 |
continue
|
241 |
-
|
242 |
node_data = dict(subgraph.nodes[node])
|
243 |
# Get entity_type as labels
|
244 |
labels = []
|
@@ -247,28 +253,26 @@ class NetworkXStorage(BaseGraphStorage):
|
|
247 |
labels.extend(node_data["entity_type"])
|
248 |
else:
|
249 |
labels.append(node_data["entity_type"])
|
250 |
-
|
251 |
# Create node with properties
|
252 |
node_properties = {k: v for k, v in node_data.items()}
|
253 |
|
254 |
result.nodes.append(
|
255 |
KnowledgeGraphNode(
|
256 |
-
id=str(node),
|
257 |
-
labels=[str(node)],
|
258 |
-
properties=node_properties
|
259 |
)
|
260 |
)
|
261 |
seen_nodes.add(str(node))
|
262 |
-
|
263 |
# Add edges to result
|
264 |
for edge in subgraph.edges():
|
265 |
source, target = edge
|
266 |
edge_id = f"{source}-{target}"
|
267 |
if edge_id in seen_edges:
|
268 |
continue
|
269 |
-
|
270 |
edge_data = dict(subgraph.edges[edge])
|
271 |
-
|
272 |
# Create edge with complete information
|
273 |
result.edges.append(
|
274 |
KnowledgeGraphEdge(
|
@@ -280,7 +284,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
280 |
)
|
281 |
)
|
282 |
seen_edges.add(edge_id)
|
283 |
-
|
284 |
# logger.info(result.edges)
|
285 |
|
286 |
logger.info(
|
|
|
184 |
# else:
|
185 |
# labels.add(node_data["entity_type"])
|
186 |
labels.add(str(node)) # Add node id as a label
|
187 |
+
|
188 |
# Return sorted list
|
189 |
return sorted(list(labels))
|
190 |
|
|
|
193 |
) -> KnowledgeGraph:
|
194 |
"""
|
195 |
Get complete connected subgraph for specified node (including the starting node itself)
|
196 |
+
|
197 |
Args:
|
198 |
node_label: Label of the starting node
|
199 |
max_depth: Maximum depth of the subgraph
|
200 |
+
|
201 |
Returns:
|
202 |
KnowledgeGraph object containing nodes and edges
|
203 |
"""
|
204 |
result = KnowledgeGraph()
|
205 |
seen_nodes = set()
|
206 |
seen_edges = set()
|
207 |
+
|
208 |
# Handle special case for "*" label
|
209 |
if node_label == "*":
|
210 |
# For "*", return the entire graph including all nodes and edges
|
211 |
+
subgraph = (
|
212 |
+
self._graph.copy()
|
213 |
+
) # Create a copy to avoid modifying the original graph
|
214 |
else:
|
215 |
# Find nodes with matching node id (partial match)
|
216 |
nodes_to_explore = []
|
217 |
for n, attr in self._graph.nodes(data=True):
|
218 |
if node_label in str(n): # Use partial matching
|
219 |
nodes_to_explore.append(n)
|
220 |
+
|
221 |
if not nodes_to_explore:
|
222 |
logger.warning(f"No nodes found with label {node_label}")
|
223 |
return result
|
224 |
|
225 |
# Get subgraph using ego_graph
|
226 |
subgraph = nx.ego_graph(self._graph, nodes_to_explore[0], radius=max_depth)
|
227 |
+
|
228 |
# Check if number of nodes exceeds max_graph_nodes
|
229 |
+
max_graph_nodes = 500
|
230 |
if len(subgraph.nodes()) > max_graph_nodes:
|
231 |
+
origin_nodes = len(subgraph.nodes())
|
232 |
node_degrees = dict(subgraph.degree())
|
233 |
+
top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
|
234 |
+
:max_graph_nodes
|
235 |
+
]
|
236 |
top_node_ids = [node[0] for node in top_nodes]
|
237 |
# Create new subgraph with only top nodes
|
238 |
subgraph = subgraph.subgraph(top_node_ids)
|
239 |
+
logger.info(
|
240 |
+
f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes by degree (depth={max_depth})"
|
241 |
+
)
|
242 |
|
243 |
# Add nodes to result
|
244 |
for node in subgraph.nodes():
|
245 |
if str(node) in seen_nodes:
|
246 |
continue
|
247 |
+
|
248 |
node_data = dict(subgraph.nodes[node])
|
249 |
# Get entity_type as labels
|
250 |
labels = []
|
|
|
253 |
labels.extend(node_data["entity_type"])
|
254 |
else:
|
255 |
labels.append(node_data["entity_type"])
|
256 |
+
|
257 |
# Create node with properties
|
258 |
node_properties = {k: v for k, v in node_data.items()}
|
259 |
|
260 |
result.nodes.append(
|
261 |
KnowledgeGraphNode(
|
262 |
+
id=str(node), labels=[str(node)], properties=node_properties
|
|
|
|
|
263 |
)
|
264 |
)
|
265 |
seen_nodes.add(str(node))
|
266 |
+
|
267 |
# Add edges to result
|
268 |
for edge in subgraph.edges():
|
269 |
source, target = edge
|
270 |
edge_id = f"{source}-{target}"
|
271 |
if edge_id in seen_edges:
|
272 |
continue
|
273 |
+
|
274 |
edge_data = dict(subgraph.edges[edge])
|
275 |
+
|
276 |
# Create edge with complete information
|
277 |
result.edges.append(
|
278 |
KnowledgeGraphEdge(
|
|
|
284 |
)
|
285 |
)
|
286 |
seen_edges.add(edge_id)
|
287 |
+
|
288 |
# logger.info(result.edges)
|
289 |
|
290 |
logger.info(
|