Spaces:
Runtime error
Runtime error
#build retriever on supabase | |
#create project, table, indexes, and functions | |
#create client with url and key | |
#insert data with embedding | |
# | |
# Load metadata.jsonl | |
import json | |
import os | |
from dotenv import load_dotenv | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import SupabaseVectorStore | |
from supabase.client import Client, create_client | |
from langchain.schema import Document | |
# Load the metadata.jsonl file | |
with open('metadata.jsonl', 'r') as jsonl_file: | |
json_list = list(jsonl_file) | |
json_QA = [] | |
for json_str in json_list: | |
json_data = json.loads(json_str) | |
json_QA.append(json_data) | |
### build a vector database based on the metadata.jsonl | |
# https://python.langchain.com/docs/integrations/vectorstores/supabase/ | |
load_dotenv() | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768 | |
supabase_url = os.environ.get("SUPABASE_URL") | |
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") | |
supabase: Client = create_client(supabase_url, supabase_key) | |
# wrap the metadata.jsonl's questions and answers into a list of document | |
docs = [] | |
for sample in json_QA: | |
content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}" | |
doc = { | |
"content" : content, | |
"metadata" : { # meatadata็ๆ ผๅผๅฟ ้กปๆถsource้ฎ๏ผๅฆๅไผๆฅ้ | |
"source" : sample['task_id'] | |
}, | |
"embedding" : embeddings.embed_query(content), | |
} | |
docs.append(doc) | |
table_name = os.environ.get('TABLE_NAME') | |
# upload the documents to the vector database | |
try: | |
response = ( | |
supabase.table("documents") | |
.insert(docs) | |
.execute() | |
) | |
except Exception as exception: | |
print("Error inserting data into Supabase:", exception) |