File size: 4,481 Bytes
ea1ba01
35325e7
ea1ba01
 
1c19c94
 
 
ea1ba01
22ea197
 
1c19c94
 
 
 
 
 
22ea197
 
 
 
 
 
 
 
 
 
1c19c94
22ea197
 
 
 
 
 
 
 
 
 
 
 
1c19c94
 
 
 
 
 
ea1ba01
22ea197
 
1c19c94
 
22ea197
 
 
 
 
 
 
 
 
 
 
 
 
1c19c94
22ea197
 
 
 
 
 
 
 
 
 
1c19c94
 
 
 
 
 
 
 
 
22ea197
1c19c94
 
 
 
22ea197
1c19c94
 
 
 
 
ea1ba01
 
 
 
 
1c19c94
ea1ba01
 
1c19c94
ea1ba01
 
 
 
 
 
9fd98bf
1c19c94
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os
import tempfile
import zipfile
import streamlit as st

def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
    """Save vector store to Supabase storage as separate files."""
    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save vector store locally first
            local_path = os.path.join(temp_dir, "vector_store")
            vector_store.save_local(local_path)
            
            # Upload index.faiss
            faiss_file = os.path.join(local_path, "index.faiss")
            if os.path.exists(faiss_file):
                with open(faiss_file, 'rb') as f:
                    supabase.storage.from_(bucket_name).upload(
                        f"{file_prefix}_index.faiss", 
                        f, 
                        {"upsert": "true"}
                    )
                print(f"Uploaded: {file_prefix}_index.faiss")
            
            # Upload index.pkl
            pkl_file = os.path.join(local_path, "index.pkl")
            if os.path.exists(pkl_file):
                with open(pkl_file, 'rb') as f:
                    supabase.storage.from_(bucket_name).upload(
                        f"{file_prefix}_index.pkl", 
                        f, 
                        {"upsert": "true"}
                    )
                print(f"Uploaded: {file_prefix}_index.pkl")
            
            print(f"Vector store uploaded to Supabase bucket: {bucket_name}")
            return True
            
    except Exception as e:
        print(f"Error uploading vector store to Supabase: {e}")
        st.error(f"Error uploading to Supabase: {e}")
        return False

def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_store"):
    """Load vector store from Supabase storage from separate files."""
    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            local_path = os.path.join(temp_dir, "vector_store")
            os.makedirs(local_path, exist_ok=True)
            
            # Download index.faiss
            try:
                faiss_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.faiss")
                faiss_file = os.path.join(local_path, "index.faiss")
                with open(faiss_file, 'wb') as f:
                    f.write(faiss_response)
                print(f"Downloaded: {file_prefix}_index.faiss")
            except Exception as e:
                print(f"Error downloading index.faiss: {e}")
                return None
            
            # Download index.pkl
            try:
                pkl_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.pkl")
                pkl_file = os.path.join(local_path, "index.pkl")
                with open(pkl_file, 'wb') as f:
                    f.write(pkl_response)
                print(f"Downloaded: {file_prefix}_index.pkl")
            except Exception as e:
                print(f"Error downloading index.pkl: {e}")
                return None
            
            # Load vector store
            embeddings = HuggingFaceEmbeddings(
                model_name="LazarusNLP/all-indo-e5-small-v4",
                model_kwargs={"device": "cpu"},
                encode_kwargs={"normalize_embeddings": True}
            )
            
            vector_store = FAISS.load_local(
                local_path, 
                embeddings, 
                allow_dangerous_deserialization=True
            )
            
            print(f"Vector store loaded from Supabase bucket: {bucket_name}")
            return vector_store
            
    except Exception as e:
        print(f"Error loading vector store from Supabase: {e}")
        st.error(f"Error loading from Supabase: {e}")
        return None

def process_documents(docs):
    embeddings = HuggingFaceEmbeddings(
        model_name="LazarusNLP/all-indo-e5-small-v4",
        model_kwargs={"device": "cpu"},
        encode_kwargs={"normalize_embeddings": True}
    )
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=300
    )
    text_chunks = text_splitter.split_documents(docs)
    vector_store = FAISS.from_documents(text_chunks, embeddings)
    
    return vector_store