ParisNeo commited on
Commit
2677ec6
·
unverified ·
1 Parent(s): 44713ad
Files changed (1) hide show
  1. lightrag/kg/postgres_impl.py +20 -10
lightrag/kg/postgres_impl.py CHANGED
@@ -438,16 +438,26 @@ class PGVectorStorage(BaseVectorStorage):
438
  @dataclass
439
  class PGDocStatusStorage(DocStatusStorage):
440
  async def filter_keys(self, keys: set[str]) -> set[str]:
441
- """Return keys that don't exist in storage"""
442
- keys = ",".join([f"'{_id}'" for _id in keys])
443
- sql = f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace='{self.db.workspace}' AND id IN ({keys})"
444
- result = await self.db.query(sql, multirows=True)
445
- # The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
446
- if result is None:
447
- return set(keys)
448
- else:
449
- existed = set([element["id"] for element in result])
450
- return set(keys) - existed
 
 
 
 
 
 
 
 
 
 
451
 
452
  async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
453
  sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
 
438
  @dataclass
439
  class PGDocStatusStorage(DocStatusStorage):
440
  async def filter_keys(self, keys: set[str]) -> set[str]:
441
+ """Filter out duplicated content"""
442
+ sql = SQL_TEMPLATES["filter_keys"].format(
443
+ table_name=namespace_to_table_name(self.namespace),
444
+ ids=",".join([f"'{id}'" for id in keys]),
445
+ )
446
+ params = {"workspace": self.db.workspace}
447
+ try:
448
+ res = await self.db.query(sql, params, multirows=True)
449
+ if res:
450
+ exist_keys = [key["id"] for key in res]
451
+ else:
452
+ exist_keys = []
453
+ new_keys = set([s for s in keys if s not in exist_keys])
454
+ print(f"keys: {keys}")
455
+ print(f"new_keys: {new_keys}")
456
+ return new_keys
457
+ except Exception as e:
458
+ logger.error(f"PostgreSQL database error: {e}")
459
+ print(sql)
460
+ print(params)
461
 
462
  async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
463
  sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"