yangdx commited on
Commit
2bf79e3
·
1 Parent(s): ebe8c66

Improved graph storage documentation and methods

Browse files

- Added detailed docstrings for graph methods
- Added bulk node/edge removal methods

Files changed (1) hide show
  1. lightrag/base.py +116 -16
lightrag/base.py CHANGED
@@ -12,7 +12,6 @@ from typing import (
12
  TypeVar,
13
  Callable,
14
  )
15
- import numpy as np
16
  from .utils import EmbeddingFunc
17
  from .types import KnowledgeGraph
18
 
@@ -281,63 +280,164 @@ class BaseGraphStorage(StorageNameSpace, ABC):
281
 
282
  @abstractmethod
283
  async def has_node(self, node_id: str) -> bool:
284
- """Check if an edge exists in the graph."""
 
 
 
 
 
 
 
285
 
286
  @abstractmethod
287
  async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
288
- """Get the degree of a node."""
 
 
 
 
 
 
 
 
289
 
290
  @abstractmethod
291
  async def node_degree(self, node_id: str) -> int:
292
- """Get the degree of an edge."""
 
 
 
 
 
 
 
293
 
294
  @abstractmethod
295
  async def edge_degree(self, src_id: str, tgt_id: str) -> int:
296
- """Get a node by its id."""
 
 
 
 
 
 
 
 
297
 
298
  @abstractmethod
299
  async def get_node(self, node_id: str) -> dict[str, str] | None:
300
- """Get node by its label identifier, return only node properties"""
 
 
 
 
 
 
 
301
 
302
  @abstractmethod
303
  async def get_edge(
304
  self, source_node_id: str, target_node_id: str
305
  ) -> dict[str, str] | None:
306
- """Get edge properties between two nodes"""
 
 
 
 
 
 
 
 
307
 
308
  @abstractmethod
309
  async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
310
- """Upsert a node into the graph."""
 
 
 
 
 
 
 
 
311
 
312
  @abstractmethod
313
  async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
314
- """Upsert an edge into the graph."""
 
 
 
 
 
 
 
 
 
 
315
 
316
  @abstractmethod
317
  async def upsert_edge(
318
  self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
319
  ) -> None:
320
- """Delete a node from the graph.
321
 
322
  Importance notes for in-memory storage:
323
  1. Changes will be persisted to disk during the next index_done_callback
324
  2. Only one process should updating the storage at a time before index_done_callback,
325
  KG-storage-log should be used to avoid data corruption
 
 
 
 
 
326
  """
327
 
328
  @abstractmethod
329
  async def delete_node(self, node_id: str) -> None:
330
- """Embed nodes using an algorithm."""
 
 
 
 
 
 
 
 
 
331
 
332
  @abstractmethod
333
- async def embed_nodes(
334
- self, algorithm: str
335
- ) -> tuple[np.ndarray[Any, Any], list[str]]:
336
- """Get all labels in the graph."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
  @abstractmethod
339
  async def get_all_labels(self) -> list[str]:
340
- """Get a knowledge graph of a node."""
 
 
 
 
341
 
342
  @abstractmethod
343
  async def get_knowledge_graph(
 
12
  TypeVar,
13
  Callable,
14
  )
 
15
  from .utils import EmbeddingFunc
16
  from .types import KnowledgeGraph
17
 
 
280
 
281
  @abstractmethod
282
  async def has_node(self, node_id: str) -> bool:
283
+ """Check if a node exists in the graph.
284
+
285
+ Args:
286
+ node_id: The ID of the node to check
287
+
288
+ Returns:
289
+ True if the node exists, False otherwise
290
+ """
291
 
292
  @abstractmethod
293
  async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
294
+ """Check if an edge exists between two nodes.
295
+
296
+ Args:
297
+ source_node_id: The ID of the source node
298
+ target_node_id: The ID of the target node
299
+
300
+ Returns:
301
+ True if the edge exists, False otherwise
302
+ """
303
 
304
  @abstractmethod
305
  async def node_degree(self, node_id: str) -> int:
306
+ """Get the degree (number of connected edges) of a node.
307
+
308
+ Args:
309
+ node_id: The ID of the node
310
+
311
+ Returns:
312
+ The number of edges connected to the node
313
+ """
314
 
315
  @abstractmethod
316
  async def edge_degree(self, src_id: str, tgt_id: str) -> int:
317
+ """Get the total degree of an edge (sum of degrees of its source and target nodes).
318
+
319
+ Args:
320
+ src_id: The ID of the source node
321
+ tgt_id: The ID of the target node
322
+
323
+ Returns:
324
+ The sum of the degrees of the source and target nodes
325
+ """
326
 
327
  @abstractmethod
328
  async def get_node(self, node_id: str) -> dict[str, str] | None:
329
+ """Get node by its ID, returning only node properties.
330
+
331
+ Args:
332
+ node_id: The ID of the node to retrieve
333
+
334
+ Returns:
335
+ A dictionary of node properties if found, None otherwise
336
+ """
337
 
338
  @abstractmethod
339
  async def get_edge(
340
  self, source_node_id: str, target_node_id: str
341
  ) -> dict[str, str] | None:
342
+ """Get edge properties between two nodes.
343
+
344
+ Args:
345
+ source_node_id: The ID of the source node
346
+ target_node_id: The ID of the target node
347
+
348
+ Returns:
349
+ A dictionary of edge properties if found, None otherwise
350
+ """
351
 
352
  @abstractmethod
353
  async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
354
+ """Get all edges connected to a node.
355
+
356
+ Args:
357
+ source_node_id: The ID of the node to get edges for
358
+
359
+ Returns:
360
+ A list of (source_id, target_id) tuples representing edges,
361
+ or None if the node doesn't exist
362
+ """
363
 
364
  @abstractmethod
365
  async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
366
+ """Insert a new node or update an existing node in the graph.
367
+
368
+ Importance notes for in-memory storage:
369
+ 1. Changes will be persisted to disk during the next index_done_callback
370
+ 2. Only one process should updating the storage at a time before index_done_callback,
371
+ KG-storage-log should be used to avoid data corruption
372
+
373
+ Args:
374
+ node_id: The ID of the node to insert or update
375
+ node_data: A dictionary of node properties
376
+ """
377
 
378
  @abstractmethod
379
  async def upsert_edge(
380
  self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
381
  ) -> None:
382
+ """Insert a new edge or update an existing edge in the graph.
383
 
384
  Importance notes for in-memory storage:
385
  1. Changes will be persisted to disk during the next index_done_callback
386
  2. Only one process should updating the storage at a time before index_done_callback,
387
  KG-storage-log should be used to avoid data corruption
388
+
389
+ Args:
390
+ source_node_id: The ID of the source node
391
+ target_node_id: The ID of the target node
392
+ edge_data: A dictionary of edge properties
393
  """
394
 
395
  @abstractmethod
396
  async def delete_node(self, node_id: str) -> None:
397
+ """Delete a node from the graph.
398
+
399
+ Importance notes for in-memory storage:
400
+ 1. Changes will be persisted to disk during the next index_done_callback
401
+ 2. Only one process should updating the storage at a time before index_done_callback,
402
+ KG-storage-log should be used to avoid data corruption
403
+
404
+ Args:
405
+ node_id: The ID of the node to delete
406
+ """
407
 
408
  @abstractmethod
409
+ async def remove_nodes(self, nodes: list[str]):
410
+ """Delete multiple nodes
411
+
412
+ Importance notes:
413
+ 1. Changes will be persisted to disk during the next index_done_callback
414
+ 2. Only one process should updating the storage at a time before index_done_callback,
415
+ KG-storage-log should be used to avoid data corruption
416
+
417
+ Args:
418
+ nodes: List of node IDs to be deleted
419
+ """
420
+
421
+ @abstractmethod
422
+ async def remove_edges(self, edges: list[tuple[str, str]]):
423
+ """Delete multiple edges
424
+
425
+ Importance notes:
426
+ 1. Changes will be persisted to disk during the next index_done_callback
427
+ 2. Only one process should updating the storage at a time before index_done_callback,
428
+ KG-storage-log should be used to avoid data corruption
429
+
430
+ Args:
431
+ edges: List of edges to be deleted, each edge is a (source, target) tuple
432
+ """
433
 
434
  @abstractmethod
435
  async def get_all_labels(self) -> list[str]:
436
+ """Get all labels in the graph.
437
+
438
+ Returns:
439
+ A list of all node labels in the graph, sorted alphabetically
440
+ """
441
 
442
  @abstractmethod
443
  async def get_knowledge_graph(