yangdx commited on
Commit
b099a36
·
2 Parent(s): 36d56dc 4dad7b8

Merge branch 'postgres-doc-ids-filter-fix'

Browse files
Files changed (1) hide show
  1. lightrag/kg/postgres_impl.py +13 -19
lightrag/kg/postgres_impl.py CHANGED
@@ -649,17 +649,11 @@ class PGVectorStorage(BaseVectorStorage):
649
  embeddings = await self.embedding_func([query])
650
  embedding = embeddings[0]
651
  embedding_string = ",".join(map(str, embedding))
652
-
653
- if ids:
654
- formatted_ids = ",".join(f"'{id}'" for id in ids)
655
- else:
656
- formatted_ids = "NULL"
657
-
658
- sql = SQL_TEMPLATES[self.namespace].format(
659
- embedding_string=embedding_string, doc_ids=formatted_ids
660
- )
661
  params = {
662
  "workspace": self.db.workspace,
 
663
  "better_than_threshold": self.cosine_better_than_threshold,
664
  "top_k": top_k,
665
  }
@@ -2137,7 +2131,7 @@ SQL_TEMPLATES = {
2137
  WITH relevant_chunks AS (
2138
  SELECT id as chunk_id
2139
  FROM LIGHTRAG_DOC_CHUNKS
2140
- WHERE {doc_ids} IS NULL OR full_doc_id = ANY(ARRAY[{doc_ids}])
2141
  )
2142
  SELECT source_id as src_id, target_id as tgt_id
2143
  FROM (
@@ -2146,15 +2140,15 @@ SQL_TEMPLATES = {
2146
  JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids)
2147
  WHERE r.workspace=$1
2148
  ) filtered
2149
- WHERE distance>$2
2150
  ORDER BY distance DESC
2151
- LIMIT $3
2152
  """,
2153
  "entities": """
2154
  WITH relevant_chunks AS (
2155
  SELECT id as chunk_id
2156
  FROM LIGHTRAG_DOC_CHUNKS
2157
- WHERE {doc_ids} IS NULL OR full_doc_id = ANY(ARRAY[{doc_ids}])
2158
  )
2159
  SELECT entity_name FROM
2160
  (
@@ -2163,26 +2157,26 @@ SQL_TEMPLATES = {
2163
  JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids)
2164
  WHERE e.workspace=$1
2165
  ) as chunk_distances
2166
- WHERE distance>$2
2167
  ORDER BY distance DESC
2168
- LIMIT $3
2169
  """,
2170
  "chunks": """
2171
  WITH relevant_chunks AS (
2172
  SELECT id as chunk_id
2173
  FROM LIGHTRAG_DOC_CHUNKS
2174
- WHERE {doc_ids} IS NULL OR full_doc_id = ANY(ARRAY[{doc_ids}])
2175
  )
2176
  SELECT id, content, file_path FROM
2177
  (
2178
  SELECT id, content, file_path, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance
2179
  FROM LIGHTRAG_DOC_CHUNKS
2180
- where workspace=$1
2181
  AND id IN (SELECT chunk_id FROM relevant_chunks)
2182
  ) as chunk_distances
2183
- WHERE distance>$2
2184
  ORDER BY distance DESC
2185
- LIMIT $3
2186
  """,
2187
  # DROP tables
2188
  "drop_specifiy_table_workspace": """
 
649
  embeddings = await self.embedding_func([query])
650
  embedding = embeddings[0]
651
  embedding_string = ",".join(map(str, embedding))
652
+ # Use parameterized document IDs (None means search across all documents)
653
+ sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
 
 
 
 
 
 
 
654
  params = {
655
  "workspace": self.db.workspace,
656
+ "doc_ids": ids,
657
  "better_than_threshold": self.cosine_better_than_threshold,
658
  "top_k": top_k,
659
  }
 
2131
  WITH relevant_chunks AS (
2132
  SELECT id as chunk_id
2133
  FROM LIGHTRAG_DOC_CHUNKS
2134
+ WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
2135
  )
2136
  SELECT source_id as src_id, target_id as tgt_id
2137
  FROM (
 
2140
  JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids)
2141
  WHERE r.workspace=$1
2142
  ) filtered
2143
+ WHERE distance>$3
2144
  ORDER BY distance DESC
2145
+ LIMIT $4
2146
  """,
2147
  "entities": """
2148
  WITH relevant_chunks AS (
2149
  SELECT id as chunk_id
2150
  FROM LIGHTRAG_DOC_CHUNKS
2151
+ WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
2152
  )
2153
  SELECT entity_name FROM
2154
  (
 
2157
  JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids)
2158
  WHERE e.workspace=$1
2159
  ) as chunk_distances
2160
+ WHERE distance>$3
2161
  ORDER BY distance DESC
2162
+ LIMIT $4
2163
  """,
2164
  "chunks": """
2165
  WITH relevant_chunks AS (
2166
  SELECT id as chunk_id
2167
  FROM LIGHTRAG_DOC_CHUNKS
2168
+ WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
2169
  )
2170
  SELECT id, content, file_path FROM
2171
  (
2172
  SELECT id, content, file_path, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance
2173
  FROM LIGHTRAG_DOC_CHUNKS
2174
+ WHERE workspace=$1
2175
  AND id IN (SELECT chunk_id FROM relevant_chunks)
2176
  ) as chunk_distances
2177
+ WHERE distance>$3
2178
  ORDER BY distance DESC
2179
+ LIMIT $4
2180
  """,
2181
  # DROP tables
2182
  "drop_specifiy_table_workspace": """