Merge branch 'postgres-doc-ids-filter-fix'
Browse files- lightrag/kg/postgres_impl.py +13 -19
lightrag/kg/postgres_impl.py
CHANGED
@@ -649,17 +649,11 @@ class PGVectorStorage(BaseVectorStorage):
|
|
649 |
embeddings = await self.embedding_func([query])
|
650 |
embedding = embeddings[0]
|
651 |
embedding_string = ",".join(map(str, embedding))
|
652 |
-
|
653 |
-
|
654 |
-
formatted_ids = ",".join(f"'{id}'" for id in ids)
|
655 |
-
else:
|
656 |
-
formatted_ids = "NULL"
|
657 |
-
|
658 |
-
sql = SQL_TEMPLATES[self.namespace].format(
|
659 |
-
embedding_string=embedding_string, doc_ids=formatted_ids
|
660 |
-
)
|
661 |
params = {
|
662 |
"workspace": self.db.workspace,
|
|
|
663 |
"better_than_threshold": self.cosine_better_than_threshold,
|
664 |
"top_k": top_k,
|
665 |
}
|
@@ -2137,7 +2131,7 @@ SQL_TEMPLATES = {
|
|
2137 |
WITH relevant_chunks AS (
|
2138 |
SELECT id as chunk_id
|
2139 |
FROM LIGHTRAG_DOC_CHUNKS
|
2140 |
-
WHERE
|
2141 |
)
|
2142 |
SELECT source_id as src_id, target_id as tgt_id
|
2143 |
FROM (
|
@@ -2146,15 +2140,15 @@ SQL_TEMPLATES = {
|
|
2146 |
JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids)
|
2147 |
WHERE r.workspace=$1
|
2148 |
) filtered
|
2149 |
-
WHERE distance>$
|
2150 |
ORDER BY distance DESC
|
2151 |
-
LIMIT $
|
2152 |
""",
|
2153 |
"entities": """
|
2154 |
WITH relevant_chunks AS (
|
2155 |
SELECT id as chunk_id
|
2156 |
FROM LIGHTRAG_DOC_CHUNKS
|
2157 |
-
WHERE
|
2158 |
)
|
2159 |
SELECT entity_name FROM
|
2160 |
(
|
@@ -2163,26 +2157,26 @@ SQL_TEMPLATES = {
|
|
2163 |
JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids)
|
2164 |
WHERE e.workspace=$1
|
2165 |
) as chunk_distances
|
2166 |
-
WHERE distance>$
|
2167 |
ORDER BY distance DESC
|
2168 |
-
LIMIT $
|
2169 |
""",
|
2170 |
"chunks": """
|
2171 |
WITH relevant_chunks AS (
|
2172 |
SELECT id as chunk_id
|
2173 |
FROM LIGHTRAG_DOC_CHUNKS
|
2174 |
-
WHERE
|
2175 |
)
|
2176 |
SELECT id, content, file_path FROM
|
2177 |
(
|
2178 |
SELECT id, content, file_path, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance
|
2179 |
FROM LIGHTRAG_DOC_CHUNKS
|
2180 |
-
|
2181 |
AND id IN (SELECT chunk_id FROM relevant_chunks)
|
2182 |
) as chunk_distances
|
2183 |
-
WHERE distance>$
|
2184 |
ORDER BY distance DESC
|
2185 |
-
LIMIT $
|
2186 |
""",
|
2187 |
# DROP tables
|
2188 |
"drop_specifiy_table_workspace": """
|
|
|
649 |
embeddings = await self.embedding_func([query])
|
650 |
embedding = embeddings[0]
|
651 |
embedding_string = ",".join(map(str, embedding))
|
652 |
+
# Use parameterized document IDs (None means search across all documents)
|
653 |
+
sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
654 |
params = {
|
655 |
"workspace": self.db.workspace,
|
656 |
+
"doc_ids": ids,
|
657 |
"better_than_threshold": self.cosine_better_than_threshold,
|
658 |
"top_k": top_k,
|
659 |
}
|
|
|
2131 |
WITH relevant_chunks AS (
|
2132 |
SELECT id as chunk_id
|
2133 |
FROM LIGHTRAG_DOC_CHUNKS
|
2134 |
+
WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
|
2135 |
)
|
2136 |
SELECT source_id as src_id, target_id as tgt_id
|
2137 |
FROM (
|
|
|
2140 |
JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids)
|
2141 |
WHERE r.workspace=$1
|
2142 |
) filtered
|
2143 |
+
WHERE distance>$3
|
2144 |
ORDER BY distance DESC
|
2145 |
+
LIMIT $4
|
2146 |
""",
|
2147 |
"entities": """
|
2148 |
WITH relevant_chunks AS (
|
2149 |
SELECT id as chunk_id
|
2150 |
FROM LIGHTRAG_DOC_CHUNKS
|
2151 |
+
WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
|
2152 |
)
|
2153 |
SELECT entity_name FROM
|
2154 |
(
|
|
|
2157 |
JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids)
|
2158 |
WHERE e.workspace=$1
|
2159 |
) as chunk_distances
|
2160 |
+
WHERE distance>$3
|
2161 |
ORDER BY distance DESC
|
2162 |
+
LIMIT $4
|
2163 |
""",
|
2164 |
"chunks": """
|
2165 |
WITH relevant_chunks AS (
|
2166 |
SELECT id as chunk_id
|
2167 |
FROM LIGHTRAG_DOC_CHUNKS
|
2168 |
+
WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
|
2169 |
)
|
2170 |
SELECT id, content, file_path FROM
|
2171 |
(
|
2172 |
SELECT id, content, file_path, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance
|
2173 |
FROM LIGHTRAG_DOC_CHUNKS
|
2174 |
+
WHERE workspace=$1
|
2175 |
AND id IN (SELECT chunk_id FROM relevant_chunks)
|
2176 |
) as chunk_distances
|
2177 |
+
WHERE distance>$3
|
2178 |
ORDER BY distance DESC
|
2179 |
+
LIMIT $4
|
2180 |
""",
|
2181 |
# DROP tables
|
2182 |
"drop_specifiy_table_workspace": """
|