Spaces:
Sleeping
Sleeping
from typing import List | |
from pinecone import Pinecone, ServerlessSpec | |
from llama_index.vector_stores.pinecone import PineconeVectorStore | |
from dotenv import load_dotenv | |
from llama_index.core import ( | |
SimpleDirectoryReader, | |
Document, | |
VectorStoreIndex, | |
StorageContext, | |
) | |
from huggingface_hub import HfFileSystem, HfApi | |
import os | |
load_dotenv() | |
# Pinecone Vector Database | |
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) | |
pc_index_name = "llama-integration-pinecone" | |
# pc_index_name = "openai-embeddings" | |
pc_indexes = pc.list_indexes() | |
# Check if the index already exists | |
def index_exists(index_name): | |
for index in pc_indexes: | |
if index["name"] == index_name: | |
return True | |
return False | |
# Create the index if it doesn't exist | |
if not index_exists(pc_index_name): | |
pc.create_index( | |
name=pc_index_name, | |
dimension=1536, | |
spec=ServerlessSpec(cloud="aws", region="us-east-1"), | |
) | |
# Initialize your index | |
pinecone_index = pc.Index(pc_index_name) | |
# print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True)) | |
# print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf")) | |
SAVE_DIR = "uploaded_files" | |
def _namespace_exists(namespace: str): | |
namespaces = pinecone_index.describe_index_stats()["namespaces"] | |
return namespace in namespaces | |
def get_pinecone_index(filename: str) -> VectorStoreIndex: | |
"""This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document.""" | |
namespace = filename.replace(".", "_").replace(" ", "_") | |
pinecone_vector_store = PineconeVectorStore( | |
pinecone_index=pinecone_index, | |
namespace=namespace, | |
) | |
index = None | |
if _namespace_exists(namespace=namespace): | |
print(f"Namespace {namespace} exists.") | |
index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store) | |
else: | |
reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"]) | |
docs = reader.load_data(show_progress=True) | |
storage_context = StorageContext.from_defaults( | |
vector_store=pinecone_vector_store | |
) | |
index = VectorStoreIndex.from_documents( | |
documents=docs, show_progress=True, storage_context=storage_context | |
) | |
return index | |
api = HfApi( | |
token=os.environ.get("HF_TOKEN") | |
) | |
api.upload_file( | |
repo_id="hbui/RegBot4.0", | |
path_or_fileobj=f"{SAVE_DIR}/calregs.pdf", | |
path_in_repo=f"{SAVE_DIR}/calregs.pdf", | |
) | |