Spaces:
Runtime error
Runtime error
import os | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.chains import RetrievalQA | |
from langchain.llms import OpenAI | |
from langchain.vectorstores import Pinecone | |
import pinecone | |
from dotenv import load_dotenv | |
from consts import INDEX_NAME | |
load_dotenv() | |
pinecone.init( | |
api_key=os.environ.get("PINECONE_API_KEY"), | |
environment=os.environ.get("PINECONE_ENVIRONMENT_REGION"), | |
) | |
def ingestDataFromPdfIntoPinecone(): | |
print('Reading Data from PDF') | |
pdf_path = "/Users/anujmahajan/Desktop/Anuj Documents/Resume/PDF/Amazon/Anuj Mahajan - IUB MS CS - CV.pdf" | |
loader = PyPDFLoader(file_path=pdf_path) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, chunk_overlap=30, separators=["\n\n", "\n", " ", ""] | |
) | |
documents = text_splitter.split_documents(documents=documents) | |
print(f"Going to insert {len(documents)} to Pinecone") | |
embeddings = OpenAIEmbeddings() | |
Pinecone.from_documents(documents, embeddings, index_name=INDEX_NAME) | |
print("****** Added to Pinecone vectorstore vectors") | |
if __name__ == "__main__": | |
ingestDataFromPdfIntoPinecone() |