Spaces:

rm-lht
/

lightrag

Configuration error

yangdx commited on Feb 12

Commit

b89f76b

1 Parent(s): bfe6274

Fix linting

Browse files

Files changed (11) hide show

lightrag/api/lightrag_server.py +22 -7
lightrag/kg/chroma_impl.py +3 -2
lightrag/kg/faiss_impl.py +3 -1
lightrag/kg/milvus_impl.py +7 -2
lightrag/kg/nano_vector_db_impl.py +3 -1
lightrag/kg/oracle_impl.py +3 -2
lightrag/kg/postgres_impl.py +4 -4
lightrag/kg/qdrant_impl.py +9 -3
lightrag/kg/tidb_impl.py +3 -1
lightrag/lightrag.py +1 -3
lightrag/operate.py +6 -4

lightrag/api/lightrag_server.py CHANGED Viewed

@@ -66,12 +66,14 @@ load_dotenv(override=True)
 config = configparser.ConfigParser()
 config.read("config.ini")
 class DefaultRAGStorageConfig:
     KV_STORAGE = "JsonKVStorage"
     VECTOR_STORAGE = "NanoVectorDBStorage"
     GRAPH_STORAGE = "NetworkXStorage"
     DOC_STATUS_STORAGE = "JsonDocStatusStorage"
 # Global progress tracker
 scan_progress: Dict = {
     "is_scanning": False,
@@ -317,22 +319,30 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--kv-storage",
-        default=get_env_value("LIGHTRAG_KV_STORAGE", DefaultRAGStorageConfig.KV_STORAGE),
         help=f"KV存储实现 (default: {DefaultRAGStorageConfig.KV_STORAGE})",
     )
     parser.add_argument(
         "--doc-status-storage",
-        default=get_env_value("LIGHTRAG_DOC_STATUS_STORAGE", DefaultRAGStorageConfig.DOC_STATUS_STORAGE),
         help=f"文档状态存储实现 (default: {DefaultRAGStorageConfig.DOC_STATUS_STORAGE})",
     )
     parser.add_argument(
         "--graph-storage",
-        default=get_env_value("LIGHTRAG_GRAPH_STORAGE", DefaultRAGStorageConfig.GRAPH_STORAGE),
         help=f"图存储实现 (default: {DefaultRAGStorageConfig.GRAPH_STORAGE})",
     )
     parser.add_argument(
         "--vector-storage",
-        default=get_env_value("LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE),
         help=f"向量存储实现 (default: {DefaultRAGStorageConfig.VECTOR_STORAGE})",
     )
@@ -725,7 +735,12 @@ def create_app(args):
                 for storage_name, storage_instance in storage_instances:
                     if isinstance(
                         storage_instance,
-                        (PGKVStorage, PGVectorStorage, PGGraphStorage, PGDocStatusStorage),
                     ):
                         storage_instance.db = postgres_db
                         logger.info(f"Injected postgres_db to {storage_name}")
@@ -790,11 +805,11 @@ def create_app(args):
             if postgres_db and hasattr(postgres_db, "pool"):
                 await postgres_db.pool.close()
                 logger.info("Closed PostgreSQL connection pool")
             if oracle_db and hasattr(oracle_db, "pool"):
                 await oracle_db.pool.close()
                 logger.info("Closed Oracle connection pool")
             if tidb_db and hasattr(tidb_db, "pool"):
                 await tidb_db.pool.close()
                 logger.info("Closed TiDB connection pool")

 config = configparser.ConfigParser()
 config.read("config.ini")
 class DefaultRAGStorageConfig:
     KV_STORAGE = "JsonKVStorage"
     VECTOR_STORAGE = "NanoVectorDBStorage"
     GRAPH_STORAGE = "NetworkXStorage"
     DOC_STATUS_STORAGE = "JsonDocStatusStorage"
 # Global progress tracker
 scan_progress: Dict = {
     "is_scanning": False,
     parser.add_argument(
         "--kv-storage",
+        default=get_env_value(
+            "LIGHTRAG_KV_STORAGE", DefaultRAGStorageConfig.KV_STORAGE
+        ),
         help=f"KV存储实现 (default: {DefaultRAGStorageConfig.KV_STORAGE})",
     )
     parser.add_argument(
         "--doc-status-storage",
+        default=get_env_value(
+            "LIGHTRAG_DOC_STATUS_STORAGE", DefaultRAGStorageConfig.DOC_STATUS_STORAGE
+        ),
         help=f"文档状态存储实现 (default: {DefaultRAGStorageConfig.DOC_STATUS_STORAGE})",
     )
     parser.add_argument(
         "--graph-storage",
+        default=get_env_value(
+            "LIGHTRAG_GRAPH_STORAGE", DefaultRAGStorageConfig.GRAPH_STORAGE
+        ),
         help=f"图存储实现 (default: {DefaultRAGStorageConfig.GRAPH_STORAGE})",
     )
     parser.add_argument(
         "--vector-storage",
+        default=get_env_value(
+            "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
+        ),
         help=f"向量存储实现 (default: {DefaultRAGStorageConfig.VECTOR_STORAGE})",
     )
                 for storage_name, storage_instance in storage_instances:
                     if isinstance(
                         storage_instance,
+                        (
+                            PGKVStorage,
+                            PGVectorStorage,
+                            PGGraphStorage,
+                            PGDocStatusStorage,
+                        ),
                     ):
                         storage_instance.db = postgres_db
                         logger.info(f"Injected postgres_db to {storage_name}")
             if postgres_db and hasattr(postgres_db, "pool"):
                 await postgres_db.pool.close()
                 logger.info("Closed PostgreSQL connection pool")
             if oracle_db and hasattr(oracle_db, "pool"):
                 await oracle_db.pool.close()
                 logger.info("Closed Oracle connection pool")
             if tidb_db and hasattr(tidb_db, "pool"):
                 await tidb_db.pool.close()
                 logger.info("Closed TiDB connection pool")

lightrag/kg/chroma_impl.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import asyncio
 from dataclasses import dataclass
 from typing import Union
@@ -20,7 +19,9 @@ class ChromaVectorDBStorage(BaseVectorStorage):
             config = self.global_config.get("vector_db_storage_cls_kwargs", {})
             cosine_threshold = config.get("cosine_better_than_threshold")
             if cosine_threshold is None:
-                raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
             self.cosine_better_than_threshold = cosine_threshold
             user_collection_settings = config.get("collection_settings", {})

 import asyncio
 from dataclasses import dataclass
 from typing import Union
             config = self.global_config.get("vector_db_storage_cls_kwargs", {})
             cosine_threshold = config.get("cosine_better_than_threshold")
             if cosine_threshold is None:
+                raise ValueError(
+                    "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+                )
             self.cosine_better_than_threshold = cosine_threshold
             user_collection_settings = config.get("collection_settings", {})

lightrag/kg/faiss_impl.py CHANGED Viewed

@@ -30,7 +30,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
         # Where to save index file if you want persistent storage

         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
         # Where to save index file if you want persistent storage

lightrag/kg/milvus_impl.py CHANGED Viewed

@@ -35,7 +35,9 @@ class MilvusVectorDBStorge(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
         self._client = MilvusClient(
@@ -111,7 +113,10 @@ class MilvusVectorDBStorge(BaseVectorStorage):
             data=embedding,
             limit=top_k,
             output_fields=list(self.meta_fields),
-            search_params={"metric_type": "COSINE", "params": {"radius": self.cosine_better_than_threshold}},
         )
         print(results)
         return [

         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
         self._client = MilvusClient(
             data=embedding,
             limit=top_k,
             output_fields=list(self.meta_fields),
+            search_params={
+                "metric_type": "COSINE",
+                "params": {"radius": self.cosine_better_than_threshold},
+            },
         )
         print(results)
         return [

lightrag/kg/nano_vector_db_impl.py CHANGED Viewed

@@ -82,7 +82,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
         self._client_file_name = os.path.join(

         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
         self._client_file_name = os.path.join(

lightrag/kg/oracle_impl.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import array
 import asyncio
-import os
 # import html
 # import os
@@ -326,7 +325,9 @@ class OracleVectorDBStorage(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
     async def upsert(self, data: dict[str, dict]):

 import array
 import asyncio
 # import html
 # import os
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
     async def upsert(self, data: dict[str, dict]):

lightrag/kg/postgres_impl.py CHANGED Viewed

@@ -306,7 +306,9 @@ class PGVectorStorage(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
     def _upsert_chunks(self, item: dict):
@@ -424,9 +426,7 @@ class PGDocStatusStorage(DocStatusStorage):
     async def filter_keys(self, data: set[str]) -> set[str]:
         """Return keys that don't exist in storage"""
         keys = ",".join([f"'{_id}'" for _id in data])
-        sql = (
-            f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace='{self.db.workspace}' AND id IN ({keys})"
-        )
         result = await self.db.query(sql, multirows=True)
         # The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
         if result is None:

         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
     def _upsert_chunks(self, item: dict):
     async def filter_keys(self, data: set[str]) -> set[str]:
         """Return keys that don't exist in storage"""
         keys = ",".join([f"'{_id}'" for _id in data])
+        sql = f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace='{self.db.workspace}' AND id IN ({keys})"
         result = await self.db.query(sql, multirows=True)
         # The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
         if result is None:

lightrag/kg/qdrant_impl.py CHANGED Viewed

@@ -64,7 +64,9 @@ class QdrantVectorDBStorage(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
         self._client = QdrantClient(
@@ -140,5 +142,9 @@ class QdrantVectorDBStorage(BaseVectorStorage):
         )
         logger.debug(f"query result: {results}")
         # 添加余弦相似度过滤
-        filtered_results = [dp for dp in results if dp.score >= self.cosine_better_than_threshold]
-        return [{**dp.payload, "id": dp.id, "distance": dp.score} for dp in filtered_results]

         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
         self._client = QdrantClient(
         )
         logger.debug(f"query result: {results}")
         # 添加余弦相似度过滤
+        filtered_results = [
+            dp for dp in results if dp.score >= self.cosine_better_than_threshold
+        ]
+        return [
+            {**dp.payload, "id": dp.id, "distance": dp.score} for dp in filtered_results
+        ]

lightrag/kg/tidb_impl.py CHANGED Viewed

@@ -222,7 +222,9 @@ class TiDBVectorDBStorage(BaseVectorStorage):
         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
-            raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
         self.cosine_better_than_threshold = cosine_threshold
     async def query(self, query: str, top_k: int) -> list[dict]:

         config = self.global_config.get("vector_db_storage_cls_kwargs", {})
         cosine_threshold = config.get("cosine_better_than_threshold")
         if cosine_threshold is None:
+            raise ValueError(
+                "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
+            )
         self.cosine_better_than_threshold = cosine_threshold
     async def query(self, query: str, top_k: int) -> list[dict]:

lightrag/lightrag.py CHANGED Viewed

@@ -426,7 +426,7 @@ class LightRAG:
         }
         self.vector_db_storage_cls_kwargs = {
             **default_vector_db_kwargs,
-            **self.vector_db_storage_cls_kwargs
         }
         # show config
@@ -532,8 +532,6 @@ class LightRAG:
                 embedding_func=self.embedding_func,
             )
         self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
             partial(
                 self.llm_model_func,

         }
         self.vector_db_storage_cls_kwargs = {
             **default_vector_db_kwargs,
+            **self.vector_db_storage_cls_kwargs,
         }
         # show config
                 embedding_func=self.embedding_func,
             )
         self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
             partial(
                 self.llm_model_func,

lightrag/operate.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import asyncio
 import json
-import os
 import re
 from tqdm.asyncio import tqdm as tqdm_async
 from typing import Any, Union
@@ -35,7 +34,6 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS
 import time
 def chunking_by_token_size(
     content: str,
     split_by_character: Union[str, None] = None,
@@ -1057,7 +1055,9 @@ async def _get_node_data(
     query_param: QueryParam,
 ):
     # get similar entities
-    logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {entities_vdb.cosine_better_than_threshold}")
     results = await entities_vdb.query(query, top_k=query_param.top_k)
     if not len(results):
         return "", "", ""
@@ -1273,7 +1273,9 @@ async def _get_edge_data(
     text_chunks_db: BaseKVStorage,
     query_param: QueryParam,
 ):
-    logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}")
     results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
     if not len(results):

 import asyncio
 import json
 import re
 from tqdm.asyncio import tqdm as tqdm_async
 from typing import Any, Union
 import time
 def chunking_by_token_size(
     content: str,
     split_by_character: Union[str, None] = None,
     query_param: QueryParam,
 ):
     # get similar entities
+    logger.info(
+        f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {entities_vdb.cosine_better_than_threshold}"
+    )
     results = await entities_vdb.query(query, top_k=query_param.top_k)
     if not len(results):
         return "", "", ""
     text_chunks_db: BaseKVStorage,
     query_param: QueryParam,
 ):
+    logger.info(
+        f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
+    )
     results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
     if not len(results):