Debug
Browse files- lightrag/kg/postgres_impl.py +20 -10
lightrag/kg/postgres_impl.py
CHANGED
@@ -438,16 +438,26 @@ class PGVectorStorage(BaseVectorStorage):
|
|
438 |
@dataclass
|
439 |
class PGDocStatusStorage(DocStatusStorage):
|
440 |
async def filter_keys(self, keys: set[str]) -> set[str]:
|
441 |
-
"""
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
|
453 |
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
|
|
|
438 |
@dataclass
|
439 |
class PGDocStatusStorage(DocStatusStorage):
|
440 |
async def filter_keys(self, keys: set[str]) -> set[str]:
|
441 |
+
"""Filter out duplicated content"""
|
442 |
+
sql = SQL_TEMPLATES["filter_keys"].format(
|
443 |
+
table_name=namespace_to_table_name(self.namespace),
|
444 |
+
ids=",".join([f"'{id}'" for id in keys]),
|
445 |
+
)
|
446 |
+
params = {"workspace": self.db.workspace}
|
447 |
+
try:
|
448 |
+
res = await self.db.query(sql, params, multirows=True)
|
449 |
+
if res:
|
450 |
+
exist_keys = [key["id"] for key in res]
|
451 |
+
else:
|
452 |
+
exist_keys = []
|
453 |
+
new_keys = set([s for s in keys if s not in exist_keys])
|
454 |
+
print(f"keys: {keys}")
|
455 |
+
print(f"new_keys: {new_keys}")
|
456 |
+
return new_keys
|
457 |
+
except Exception as e:
|
458 |
+
logger.error(f"PostgreSQL database error: {e}")
|
459 |
+
print(sql)
|
460 |
+
print(params)
|
461 |
|
462 |
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
|
463 |
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
|