Upload-Docs / build.py
sharjeel1477's picture
Update build.py
facddd7
raw
history blame
3.44 kB
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.vector_stores import PineconeVectorStore
from llama_index.storage.storage_context import StorageContext
import pinecone
import logging
import sys
import os
import openai
from pymongo.mongo_client import MongoClient
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
pinecone_key = os.environ['PINECONE_KEY']
mongo_key=os.environ["MONGO_KEY"]
def add_file(collection,username, filename):
document = {
"brain": username,
"filename": filename,
"namespace":None,
"timestamp": datetime.utcnow()
}
collection.insert_one(document)
def delete_file(collection,username, filename):
query = {
"brain": username,
"filename": filename
}
collection.delete_one(query)
def updateBrain(brainName, files):
print(len(files))
print("Updating brain")
print(brainName)
pinecone.init(api_key=pinecone_key,
environment="us-west4-gcp")
documents = []
corrupt = []
newfiles = []
i = 0
for file in files:
loader = SimpleDirectoryReader(input_files=[file.name])
try:
document1 = loader.load_data()
for doc in document1:
doc.doc_id=os.path.basename(file.name)
doc.extra_info={"filename":os.path.basename(file.name)}
documents.append(doc)
newfiles.append(os.path.basename(file.name))
except Exception as e:
print(e)
if (str(e) == "Cannot read an empty file"):
return "Please Wait! Files are uploading, Try again Later!"
corrupt.append(os.path.basename(file.name))
i = i+1
print(i)
pindex = pinecone.Index(brainName)
try:
print(pindex.describe_index_stats())
except Exception as e:
print(e)
vector_store = PineconeVectorStore(pinecone_index=pindex)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20)
index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context)
client = MongoClient(mongo_key)
db = client['nbrain']
collection = db['files']
for prevfile in newfiles:
index.delete_ref_doc(prevfile, delete_from_docstore=True)
delete_file(collection,brainName,prevfile)
uploadedFiles=[]
j = 1
for doc in documents:
try:
index.insert(doc)
if doc.doc_id not in uploadedFiles:
print(doc.doc_id)
add_file(collection,brainName,doc.doc_id)
uploadedFiles.append(doc.doc_id)
print(j)
j = j+1
except Exception as e:
if doc.doc_id not in corrupt:
corrupt.append(doc.doc_id)
print("ERROR : "+str(e))
print("Brain Updated")
try:
print(pindex.describe_index_stats())
except Exception as e:
print(e)
print(corrupt)
if (len(corrupt) > 0):
return """Brain Updated!
Below files are corrupt/unformatted, and not added to the brain.
""" + str(corrupt)
return brainName+" Brain Updated!"