Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

yangdx commited on Feb 27

Commit

25287b8

1 Parent(s): 738c425

Fix linting

Browse files

Files changed (6) hide show

lightrag/kg/faiss_impl.py +3 -8
lightrag/kg/json_doc_status_impl.py +3 -1
lightrag/kg/json_kv_impl.py +3 -1
lightrag/kg/nano_vector_db_impl.py +2 -3
lightrag/kg/networkx_impl.py +4 -3
lightrag/kg/shared_storage.py +8 -5

lightrag/kg/faiss_impl.py CHANGED Viewed

@@ -17,6 +17,7 @@ if not pm.is_installed("faiss"):
 import faiss  # type: ignore
 from threading import Lock as ThreadLock
 @final
 @dataclass
 class FaissVectorDBStorage(BaseVectorStorage):
@@ -59,7 +60,6 @@ class FaissVectorDBStorage(BaseVectorStorage):
         with self._storage_lock:
             self._load_faiss_index()
     def _get_index(self):
         """Check if the shtorage should be reloaded"""
         return self._index
@@ -224,10 +224,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
         logger.debug(f"Searching relations for entity {entity_name}")
         relations = []
         for fid, meta in self._id_to_meta.items():
-            if (
-                meta.get("src_id") == entity_name
-                or meta.get("tgt_id") == entity_name
-            ):
                 relations.append(fid)
         logger.debug(f"Found {len(relations)} relations for {entity_name}")
@@ -265,7 +262,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
             new_id_to_meta[new_fid] = vec_meta
         with self._storage_lock:
-        # Re-init index
             self._index = faiss.IndexFlatIP(self._dim)
             if vectors_to_keep:
                 arr = np.array(vectors_to_keep, dtype=np.float32)
@@ -273,7 +270,6 @@ class FaissVectorDBStorage(BaseVectorStorage):
             self._id_to_meta = new_id_to_meta
     def _save_faiss_index(self):
         """
         Save the current Faiss index + metadata to disk so it can persist across runs.
@@ -290,7 +286,6 @@ class FaissVectorDBStorage(BaseVectorStorage):
         with open(self._meta_file, "w", encoding="utf-8") as f:
             json.dump(serializable_dict, f)
     def _load_faiss_index(self):
         """
         Load the Faiss index + metadata from disk if it exists,

 import faiss  # type: ignore
 from threading import Lock as ThreadLock
 @final
 @dataclass
 class FaissVectorDBStorage(BaseVectorStorage):
         with self._storage_lock:
             self._load_faiss_index()
     def _get_index(self):
         """Check if the shtorage should be reloaded"""
         return self._index
         logger.debug(f"Searching relations for entity {entity_name}")
         relations = []
         for fid, meta in self._id_to_meta.items():
+            if meta.get("src_id") == entity_name or meta.get("tgt_id") == entity_name:
                 relations.append(fid)
         logger.debug(f"Found {len(relations)} relations for {entity_name}")
             new_id_to_meta[new_fid] = vec_meta
         with self._storage_lock:
+            # Re-init index
             self._index = faiss.IndexFlatIP(self._dim)
             if vectors_to_keep:
                 arr = np.array(vectors_to_keep, dtype=np.float32)
             self._id_to_meta = new_id_to_meta
     def _save_faiss_index(self):
         """
         Save the current Faiss index + metadata to disk so it can persist across runs.
         with open(self._meta_file, "w", encoding="utf-8") as f:
             json.dump(serializable_dict, f)
     def _load_faiss_index(self):
         """
         Load the Faiss index + metadata from disk if it exists,

lightrag/kg/json_doc_status_impl.py CHANGED Viewed

@@ -84,7 +84,9 @@ class JsonDocStatusStorage(DocStatusStorage):
     async def index_done_callback(self) -> None:
         with self._storage_lock:
-            data_dict = dict(self._data) if hasattr(self._data, "_getvalue") else self._data
             write_json(data_dict, self._file_name)
     async def upsert(self, data: dict[str, dict[str, Any]]) -> None:

     async def index_done_callback(self) -> None:
         with self._storage_lock:
+            data_dict = (
+                dict(self._data) if hasattr(self._data, "_getvalue") else self._data
+            )
             write_json(data_dict, self._file_name)
     async def upsert(self, data: dict[str, dict[str, Any]]) -> None:

lightrag/kg/json_kv_impl.py CHANGED Viewed

@@ -36,7 +36,9 @@ class JsonKVStorage(BaseKVStorage):
     async def index_done_callback(self) -> None:
         with self._storage_lock:
-            data_dict = dict(self._data) if hasattr(self._data, "_getvalue") else self._data
             write_json(data_dict, self._file_name)
     async def get_by_id(self, id: str) -> dict[str, Any] | None:

     async def index_done_callback(self) -> None:
         with self._storage_lock:
+            data_dict = (
+                dict(self._data) if hasattr(self._data, "_getvalue") else self._data
+            )
             write_json(data_dict, self._file_name)
     async def get_by_id(self, id: str) -> dict[str, Any] | None:

lightrag/kg/nano_vector_db_impl.py CHANGED Viewed

@@ -18,6 +18,7 @@ if not pm.is_installed("nano-vectordb"):
 from nano_vectordb import NanoVectorDB
 from threading import Lock as ThreadLock
 @final
 @dataclass
 class NanoVectorDBStorage(BaseVectorStorage):
@@ -148,9 +149,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
                 for dp in storage["data"]
                 if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
             ]
-            logger.debug(
-                f"Found {len(relations)} relations for entity {entity_name}"
-            )
             ids_to_delete = [relation["__id__"] for relation in relations]
             if ids_to_delete:

 from nano_vectordb import NanoVectorDB
 from threading import Lock as ThreadLock
 @final
 @dataclass
 class NanoVectorDBStorage(BaseVectorStorage):
                 for dp in storage["data"]
                 if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
             ]
+            logger.debug(f"Found {len(relations)} relations for entity {entity_name}")
             ids_to_delete = [relation["__id__"] for relation in relations]
             if ids_to_delete:

lightrag/kg/networkx_impl.py CHANGED Viewed

@@ -19,6 +19,7 @@ import networkx as nx
 from graspologic import embed
 from threading import Lock as ThreadLock
 @final
 @dataclass
 class NetworkXStorage(BaseGraphStorage):
@@ -231,9 +232,9 @@ class NetworkXStorage(BaseGraphStorage):
         if len(subgraph.nodes()) > max_graph_nodes:
             origin_nodes = len(subgraph.nodes())
             node_degrees = dict(subgraph.degree())
-            top_nodes = sorted(
-                node_degrees.items(), key=lambda x: x[1], reverse=True
-            )[:max_graph_nodes]
             top_node_ids = [node[0] for node in top_nodes]
             # Create new subgraph with only top nodes
             subgraph = subgraph.subgraph(top_node_ids)

 from graspologic import embed
 from threading import Lock as ThreadLock
 @final
 @dataclass
 class NetworkXStorage(BaseGraphStorage):
         if len(subgraph.nodes()) > max_graph_nodes:
             origin_nodes = len(subgraph.nodes())
             node_degrees = dict(subgraph.degree())
+            top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
+                :max_graph_nodes
+            ]
             top_node_ids = [node[0] for node in top_nodes]
             # Create new subgraph with only top nodes
             subgraph = subgraph.subgraph(top_node_ids)

lightrag/kg/shared_storage.py CHANGED Viewed

@@ -26,6 +26,7 @@ _global_lock: Optional[LockType] = None
 _shared_dicts: Optional[Dict[str, Any]] = None
 _init_flags: Optional[Dict[str, bool]] = None  # namespace -> initialized
 def initialize_share_data(workers: int = 1):
     """
     Initialize shared storage data for single or multi-process mode.
@@ -66,9 +67,7 @@ def initialize_share_data(workers: int = 1):
         is_multiprocess = True
         _global_lock = _manager.Lock()
         _shared_dicts = _manager.dict()
-        _init_flags = (
-            _manager.dict()
-        )
         direct_log(
             f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
         )
@@ -95,9 +94,13 @@ def try_initialize_namespace(namespace: str) -> bool:
     if namespace not in _init_flags:
         _init_flags[namespace] = True
-        direct_log(f"Process {os.getpid()} ready to initialize storage namespace: [{namespace}]")
         return True
-    direct_log(f"Process {os.getpid()} storage namespace already to initialized: [{namespace}]")
     return False

 _shared_dicts: Optional[Dict[str, Any]] = None
 _init_flags: Optional[Dict[str, bool]] = None  # namespace -> initialized
 def initialize_share_data(workers: int = 1):
     """
     Initialize shared storage data for single or multi-process mode.
         is_multiprocess = True
         _global_lock = _manager.Lock()
         _shared_dicts = _manager.dict()
+        _init_flags = _manager.dict()
         direct_log(
             f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
         )
     if namespace not in _init_flags:
         _init_flags[namespace] = True
+        direct_log(
+            f"Process {os.getpid()} ready to initialize storage namespace: [{namespace}]"
+        )
         return True
+    direct_log(
+        f"Process {os.getpid()} storage namespace already to initialized: [{namespace}]"
+    )
     return False