from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext from llama_index.vector_stores import PineconeVectorStore from llama_index.storage.storage_context import StorageContext import pinecone import logging import sys import os import openai from pymongo.mongo_client import MongoClient from datetime import datetime logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) pinecone_key = os.environ['PINECONE_KEY'] mongo_key=os.environ["MONGO_KEY"] def add_file(collection,username, filename): document = { "brain": username, "filename": filename, "namespace":None, "timestamp": datetime.utcnow() } collection.insert_one(document) def delete_file(collection,username, filename): query = { "brain": username, "filename": filename } collection.delete_one(query) def updateBrain(brainName, files): print(len(files)) print("Updating brain") print(brainName) pinecone.init(api_key=pinecone_key, environment="us-west4-gcp") documents = [] corrupt = [] newfiles = [] i = 0 for file in files: loader = SimpleDirectoryReader(input_files=[file.name]) try: document1 = loader.load_data() for doc in document1: doc.doc_id=os.path.basename(file.name) doc.extra_info={"filename":os.path.basename(file.name)} documents.append(doc) newfiles.append(os.path.basename(file.name)) except Exception as e: print(e) if (str(e) == "Cannot read an empty file"): return "Please Wait! Files are uploading, Try again Later!" corrupt.append(os.path.basename(file.name)) i = i+1 print(i) pindex = pinecone.Index(brainName) try: print(pindex.describe_index_stats()) except Exception as e: print(e) vector_store = PineconeVectorStore(pinecone_index=pindex) storage_context = StorageContext.from_defaults(vector_store=vector_store) service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20) index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context) client = MongoClient(mongo_key) db = client['nbrain'] collection = db['files'] for prevfile in newfiles: index.delete_ref_doc(prevfile, delete_from_docstore=True) delete_file(collection,brainName,prevfile) uploadedFiles=[] j = 1 for doc in documents: try: index.insert(doc) if doc.doc_id not in uploadedFiles: print(doc.doc_id) add_file(collection,brainName,doc.doc_id) uploadedFiles.append(doc.doc_id) print(j) j = j+1 except Exception as e: if doc.doc_id not in corrupt: corrupt.append(doc.doc_id) print("ERROR : "+str(e)) print("Brain Updated") try: print(pindex.describe_index_stats()) except Exception as e: print(e) print(corrupt) if (len(corrupt) > 0): return """Brain Updated! Below files are corrupt/unformatted, and not added to the brain. """ + str(corrupt) return brainName+" Brain Updated!"