pnp-chatbot-v1 / app /document_processor.py
FauziIsyrinApridal
update penyimpanan vectore_store ke supabase
22ea197
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os
import tempfile
import zipfile
import streamlit as st
def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
"""Save vector store to Supabase storage as separate files."""
try:
with tempfile.TemporaryDirectory() as temp_dir:
# Save vector store locally first
local_path = os.path.join(temp_dir, "vector_store")
vector_store.save_local(local_path)
# Upload index.faiss
faiss_file = os.path.join(local_path, "index.faiss")
if os.path.exists(faiss_file):
with open(faiss_file, 'rb') as f:
supabase.storage.from_(bucket_name).upload(
f"{file_prefix}_index.faiss",
f,
{"upsert": "true"}
)
print(f"Uploaded: {file_prefix}_index.faiss")
# Upload index.pkl
pkl_file = os.path.join(local_path, "index.pkl")
if os.path.exists(pkl_file):
with open(pkl_file, 'rb') as f:
supabase.storage.from_(bucket_name).upload(
f"{file_prefix}_index.pkl",
f,
{"upsert": "true"}
)
print(f"Uploaded: {file_prefix}_index.pkl")
print(f"Vector store uploaded to Supabase bucket: {bucket_name}")
return True
except Exception as e:
print(f"Error uploading vector store to Supabase: {e}")
st.error(f"Error uploading to Supabase: {e}")
return False
def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_store"):
"""Load vector store from Supabase storage from separate files."""
try:
with tempfile.TemporaryDirectory() as temp_dir:
local_path = os.path.join(temp_dir, "vector_store")
os.makedirs(local_path, exist_ok=True)
# Download index.faiss
try:
faiss_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.faiss")
faiss_file = os.path.join(local_path, "index.faiss")
with open(faiss_file, 'wb') as f:
f.write(faiss_response)
print(f"Downloaded: {file_prefix}_index.faiss")
except Exception as e:
print(f"Error downloading index.faiss: {e}")
return None
# Download index.pkl
try:
pkl_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.pkl")
pkl_file = os.path.join(local_path, "index.pkl")
with open(pkl_file, 'wb') as f:
f.write(pkl_response)
print(f"Downloaded: {file_prefix}_index.pkl")
except Exception as e:
print(f"Error downloading index.pkl: {e}")
return None
# Load vector store
embeddings = HuggingFaceEmbeddings(
model_name="LazarusNLP/all-indo-e5-small-v4",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vector_store = FAISS.load_local(
local_path,
embeddings,
allow_dangerous_deserialization=True
)
print(f"Vector store loaded from Supabase bucket: {bucket_name}")
return vector_store
except Exception as e:
print(f"Error loading vector store from Supabase: {e}")
st.error(f"Error loading from Supabase: {e}")
return None
def process_documents(docs):
embeddings = HuggingFaceEmbeddings(
model_name="LazarusNLP/all-indo-e5-small-v4",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500,
chunk_overlap=300
)
text_chunks = text_splitter.split_documents(docs)
vector_store = FAISS.from_documents(text_chunks, embeddings)
return vector_store