from pymongo import MongoClient # error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings from langchain_openai import OpenAIEmbeddings # error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch from langchain_community.vectorstores import MongoDBAtlasVectorSearch # error since Jan 2024, from langchain.document_loaders import PyPDFLoader from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter import os mongo_uri = os.getenv("MONGO_URI") openai_api_key = os.getenv("OPENAI_API_KEY") client = MongoClient(mongo_uri) dbName = "langchain_demo" collectionName = "collection_of_text_blobs" collection = client[dbName][collectionName] #loader = DirectoryLoader( './sample_files', glob="./*.txt", show_progress=True) loader = PyPDFLoader("https://arxiv.org/pdf/2303.08774.pdf") data = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0) docs = text_splitter.split_documents(data) #embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) #vectorStore = MongoDBAtlasVectorSearch.from_documents( data, embeddings, collection=collection, index_name="default" ) # insert the documents in MongoDB Atlas Vector Search x = MongoDBAtlasVectorSearch.from_documents( documents=docs, embedding=OpenAIEmbeddings(openai_api_key=openai_api_key, disallowed_special=()), collection=collection, index_name="default" )