Spaces:
Sleeping
Sleeping
| -- 1. Enable the pgvector extension if you haven't already. | |
| -- You only need to run this once per database. | |
| CREATE EXTENSION IF NOT EXISTS vector; | |
| -- 2. Create the table to store your data and embeddings. | |
| CREATE TABLE IF NOT EXISTS documents ( | |
| id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, | |
| content TEXT, | |
| metadata JSONB, | |
| embedding VECTOR(768) -- The vector dimension is 768 based on the actual data. | |
| ); | |
| -- 3. Create similarity search functions | |
| -- 3a. Create the single-parameter version that SupabaseVectorStore uses by default | |
| CREATE OR REPLACE FUNCTION match_documents ( | |
| query_embedding VECTOR(768) | |
| ) | |
| RETURNS TABLE ( | |
| id BIGINT, | |
| content TEXT, | |
| metadata JSONB, | |
| similarity FLOAT | |
| ) | |
| LANGUAGE sql STABLE | |
| AS $$ | |
| SELECT | |
| documents.id, | |
| documents.content, | |
| documents.metadata, | |
| 1 - (documents.embedding <=> query_embedding) AS similarity | |
| FROM documents | |
| ORDER BY similarity DESC | |
| LIMIT 5; -- Default limit of 5 results | |
| $$; | |
| -- 3b. Create the three-parameter version for more control | |
| CREATE OR REPLACE FUNCTION match_documents ( | |
| query_embedding VECTOR(768), | |
| match_threshold FLOAT, | |
| match_count INT | |
| ) | |
| RETURNS TABLE ( | |
| id BIGINT, | |
| content TEXT, | |
| metadata JSONB, | |
| similarity FLOAT | |
| ) | |
| LANGUAGE sql STABLE | |
| AS $$ | |
| SELECT | |
| documents.id, | |
| documents.content, | |
| documents.metadata, | |
| 1 - (documents.embedding <=> query_embedding) AS similarity | |
| FROM documents | |
| WHERE 1 - (documents.embedding <=> query_embedding) > match_threshold | |
| ORDER BY similarity DESC | |
| LIMIT match_count; | |
| $$; | |
| -- 4. Create an index to speed up similarity searches. | |
| -- An HNSW index is generally recommended for its balance of speed and accuracy. | |
| CREATE INDEX IF NOT EXISTS documents_embedding_idx ON documents USING HNSW (embedding vector_cosine_ops); |