Spaces:
Runtime error
Runtime error
Zwea Htet
commited on
Commit
·
9fb0f7d
1
Parent(s):
0a665f4
integrated pinecone with llama index to store vector embeddings
Browse files- models/vector_database.py +41 -2
- pages/llama_custom_demo.py +6 -23
models/vector_database.py
CHANGED
@@ -1,6 +1,14 @@
|
|
|
|
1 |
from pinecone import Pinecone, ServerlessSpec
|
2 |
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
3 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
import os
|
6 |
|
@@ -30,5 +38,36 @@ if not index_exists(pc_index_name):
|
|
30 |
# Initialize your index
|
31 |
pinecone_index = pc.Index(pc_index_name)
|
32 |
|
33 |
-
#
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
from pinecone import Pinecone, ServerlessSpec
|
3 |
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
4 |
from dotenv import load_dotenv
|
5 |
+
from llama_index.core import (
|
6 |
+
SimpleDirectoryReader,
|
7 |
+
Document,
|
8 |
+
VectorStoreIndex,
|
9 |
+
StorageContext,
|
10 |
+
)
|
11 |
+
from huggingface_hub import HfFileSystem
|
12 |
|
13 |
import os
|
14 |
|
|
|
38 |
# Initialize your index
|
39 |
pinecone_index = pc.Index(pc_index_name)
|
40 |
|
41 |
+
# print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True))
|
42 |
+
# print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf"))
|
43 |
+
|
44 |
+
SAVE_DIR = "uploaded_files"
|
45 |
+
|
46 |
+
|
47 |
+
def _namespace_exists(namespace: str):
|
48 |
+
namespaces = pinecone_index.describe_index_stats()["namespaces"]
|
49 |
+
return namespace in namespaces
|
50 |
+
|
51 |
+
|
52 |
+
def get_pinecone_index(filename: str) -> VectorStoreIndex:
|
53 |
+
"""This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
|
54 |
+
namespace = filename.replace(".", "_").replace(" ", "_")
|
55 |
+
pinecone_vector_store = PineconeVectorStore(
|
56 |
+
pinecone_index=pinecone_index,
|
57 |
+
namespace=namespace,
|
58 |
+
)
|
59 |
+
index = None
|
60 |
+
if _namespace_exists(namespace=namespace):
|
61 |
+
print(f"Namespace {namespace} exists.")
|
62 |
+
index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)
|
63 |
+
else:
|
64 |
+
reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
|
65 |
+
docs = reader.load_data(show_progress=True)
|
66 |
+
storage_context = StorageContext.from_defaults(
|
67 |
+
vector_store=pinecone_vector_store
|
68 |
+
)
|
69 |
+
index = VectorStoreIndex.from_documents(
|
70 |
+
documents=docs, show_progress=True, storage_context=storage_context
|
71 |
+
)
|
72 |
+
|
73 |
+
return index
|
pages/llama_custom_demo.py
CHANGED
@@ -5,11 +5,11 @@ from typing import List
|
|
5 |
|
6 |
# local imports
|
7 |
from models.llms import load_llm, integrated_llms
|
8 |
-
from models.embeddings import
|
9 |
from models.llamaCustom import LlamaCustom
|
10 |
-
from models.llamaCustomV2 import LlamaCustomV2
|
11 |
|
12 |
-
|
13 |
from utils.chatbox import show_previous_messages, show_chat_input
|
14 |
from utils.util import validate_openai_api_key
|
15 |
|
@@ -22,6 +22,7 @@ from llama_index.core import (
|
|
22 |
Settings,
|
23 |
load_index_from_storage,
|
24 |
)
|
|
|
25 |
from llama_index.core.memory import ChatMemoryBuffer
|
26 |
from llama_index.core.base.llms.types import ChatMessage
|
27 |
|
@@ -93,24 +94,6 @@ def get_index(
|
|
93 |
raise e
|
94 |
return index
|
95 |
|
96 |
-
|
97 |
-
# def get_pinecone_index(filename: str) -> VectorStoreIndex:
|
98 |
-
# """Thie function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
|
99 |
-
# reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
|
100 |
-
# docs = reader.load_data(show_progress=True)
|
101 |
-
# storage_context = StorageContext.from_defaults(vector_store=pinecone_vector_store)
|
102 |
-
# index = VectorStoreIndex.from_documents(
|
103 |
-
# documents=docs, show_progress=True, storage_context=storage_context
|
104 |
-
# )
|
105 |
-
|
106 |
-
# return index
|
107 |
-
|
108 |
-
|
109 |
-
def get_chroma_index(filename: str) -> VectorStoreIndex:
|
110 |
-
"""This function loads the index from Chroma if it exists, otherwise it creates a new index from the document."""
|
111 |
-
pass
|
112 |
-
|
113 |
-
|
114 |
def check_api_key(model_name: str, source: str):
|
115 |
if source.startswith("openai"):
|
116 |
if not st.session_state.openai_api_key:
|
@@ -205,8 +188,8 @@ with tab1:
|
|
205 |
Settings.llm = llama_llm
|
206 |
|
207 |
st.write("Processing Data ...")
|
208 |
-
index = get_index(selected_file)
|
209 |
-
|
210 |
|
211 |
st.write("Finishing Up ...")
|
212 |
llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
|
|
|
5 |
|
6 |
# local imports
|
7 |
from models.llms import load_llm, integrated_llms
|
8 |
+
from models.embeddings import openai_embed_model
|
9 |
from models.llamaCustom import LlamaCustom
|
10 |
+
# from models.llamaCustomV2 import LlamaCustomV2
|
11 |
|
12 |
+
from models.vector_database import get_pinecone_index
|
13 |
from utils.chatbox import show_previous_messages, show_chat_input
|
14 |
from utils.util import validate_openai_api_key
|
15 |
|
|
|
22 |
Settings,
|
23 |
load_index_from_storage,
|
24 |
)
|
25 |
+
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
26 |
from llama_index.core.memory import ChatMemoryBuffer
|
27 |
from llama_index.core.base.llms.types import ChatMessage
|
28 |
|
|
|
94 |
raise e
|
95 |
return index
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def check_api_key(model_name: str, source: str):
|
98 |
if source.startswith("openai"):
|
99 |
if not st.session_state.openai_api_key:
|
|
|
188 |
Settings.llm = llama_llm
|
189 |
|
190 |
st.write("Processing Data ...")
|
191 |
+
# index = get_index(selected_file)
|
192 |
+
index = get_pinecone_index(selected_file)
|
193 |
|
194 |
st.write("Finishing Up ...")
|
195 |
llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
|