Zwea Htet commited on
Commit
9fb0f7d
1 Parent(s): 0a665f4

integrated pinecone with llama index to store vector embeddings

Browse files
models/vector_database.py CHANGED
@@ -1,6 +1,14 @@
 
1
  from pinecone import Pinecone, ServerlessSpec
2
  from llama_index.vector_stores.pinecone import PineconeVectorStore
3
  from dotenv import load_dotenv
 
 
 
 
 
 
 
4
 
5
  import os
6
 
@@ -30,5 +38,36 @@ if not index_exists(pc_index_name):
30
  # Initialize your index
31
  pinecone_index = pc.Index(pc_index_name)
32
 
33
- # Define the vector store
34
- pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
  from pinecone import Pinecone, ServerlessSpec
3
  from llama_index.vector_stores.pinecone import PineconeVectorStore
4
  from dotenv import load_dotenv
5
+ from llama_index.core import (
6
+ SimpleDirectoryReader,
7
+ Document,
8
+ VectorStoreIndex,
9
+ StorageContext,
10
+ )
11
+ from huggingface_hub import HfFileSystem
12
 
13
  import os
14
 
 
38
  # Initialize your index
39
  pinecone_index = pc.Index(pc_index_name)
40
 
41
+ # print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True))
42
+ # print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf"))
43
+
44
+ SAVE_DIR = "uploaded_files"
45
+
46
+
47
+ def _namespace_exists(namespace: str):
48
+ namespaces = pinecone_index.describe_index_stats()["namespaces"]
49
+ return namespace in namespaces
50
+
51
+
52
+ def get_pinecone_index(filename: str) -> VectorStoreIndex:
53
+ """This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
54
+ namespace = filename.replace(".", "_").replace(" ", "_")
55
+ pinecone_vector_store = PineconeVectorStore(
56
+ pinecone_index=pinecone_index,
57
+ namespace=namespace,
58
+ )
59
+ index = None
60
+ if _namespace_exists(namespace=namespace):
61
+ print(f"Namespace {namespace} exists.")
62
+ index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)
63
+ else:
64
+ reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
65
+ docs = reader.load_data(show_progress=True)
66
+ storage_context = StorageContext.from_defaults(
67
+ vector_store=pinecone_vector_store
68
+ )
69
+ index = VectorStoreIndex.from_documents(
70
+ documents=docs, show_progress=True, storage_context=storage_context
71
+ )
72
+
73
+ return index
pages/llama_custom_demo.py CHANGED
@@ -5,11 +5,11 @@ from typing import List
5
 
6
  # local imports
7
  from models.llms import load_llm, integrated_llms
8
- from models.embeddings import hf_embed_model, openai_embed_model
9
  from models.llamaCustom import LlamaCustom
10
- from models.llamaCustomV2 import LlamaCustomV2
11
 
12
- # from models.vector_database import pinecone_vector_store
13
  from utils.chatbox import show_previous_messages, show_chat_input
14
  from utils.util import validate_openai_api_key
15
 
@@ -22,6 +22,7 @@ from llama_index.core import (
22
  Settings,
23
  load_index_from_storage,
24
  )
 
25
  from llama_index.core.memory import ChatMemoryBuffer
26
  from llama_index.core.base.llms.types import ChatMessage
27
 
@@ -93,24 +94,6 @@ def get_index(
93
  raise e
94
  return index
95
 
96
-
97
- # def get_pinecone_index(filename: str) -> VectorStoreIndex:
98
- # """Thie function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
99
- # reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
100
- # docs = reader.load_data(show_progress=True)
101
- # storage_context = StorageContext.from_defaults(vector_store=pinecone_vector_store)
102
- # index = VectorStoreIndex.from_documents(
103
- # documents=docs, show_progress=True, storage_context=storage_context
104
- # )
105
-
106
- # return index
107
-
108
-
109
- def get_chroma_index(filename: str) -> VectorStoreIndex:
110
- """This function loads the index from Chroma if it exists, otherwise it creates a new index from the document."""
111
- pass
112
-
113
-
114
  def check_api_key(model_name: str, source: str):
115
  if source.startswith("openai"):
116
  if not st.session_state.openai_api_key:
@@ -205,8 +188,8 @@ with tab1:
205
  Settings.llm = llama_llm
206
 
207
  st.write("Processing Data ...")
208
- index = get_index(selected_file)
209
- # index = get_pinecone_index(selected_file)
210
 
211
  st.write("Finishing Up ...")
212
  llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
 
5
 
6
  # local imports
7
  from models.llms import load_llm, integrated_llms
8
+ from models.embeddings import openai_embed_model
9
  from models.llamaCustom import LlamaCustom
10
+ # from models.llamaCustomV2 import LlamaCustomV2
11
 
12
+ from models.vector_database import get_pinecone_index
13
  from utils.chatbox import show_previous_messages, show_chat_input
14
  from utils.util import validate_openai_api_key
15
 
 
22
  Settings,
23
  load_index_from_storage,
24
  )
25
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
26
  from llama_index.core.memory import ChatMemoryBuffer
27
  from llama_index.core.base.llms.types import ChatMessage
28
 
 
94
  raise e
95
  return index
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def check_api_key(model_name: str, source: str):
98
  if source.startswith("openai"):
99
  if not st.session_state.openai_api_key:
 
188
  Settings.llm = llama_llm
189
 
190
  st.write("Processing Data ...")
191
+ # index = get_index(selected_file)
192
+ index = get_pinecone_index(selected_file)
193
 
194
  st.write("Finishing Up ...")
195
  llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)