Spaces:
Runtime error
Runtime error
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext | |
from llama_index.vector_stores import PineconeVectorStore | |
from llama_index.storage.storage_context import StorageContext | |
import pinecone | |
import logging | |
import sys | |
import os | |
import openai | |
from pymongo.mongo_client import MongoClient | |
from datetime import datetime | |
logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
pinecone_key = os.environ['PINECONE_KEY'] | |
mongo_key=os.environ["MONGO_KEY"] | |
def add_file(collection,username, filename): | |
document = { | |
"brain": username, | |
"filename": filename, | |
"namespace":None, | |
"timestamp": datetime.utcnow() | |
} | |
collection.insert_one(document) | |
def delete_file(collection,username, filename): | |
query = { | |
"brain": username, | |
"filename": filename | |
} | |
collection.delete_one(query) | |
def updateBrain(brainName, files): | |
print(len(files)) | |
print("Updating brain") | |
print(brainName) | |
pinecone.init(api_key=pinecone_key, | |
environment="us-west4-gcp") | |
documents = [] | |
corrupt = [] | |
newfiles = [] | |
i = 0 | |
for file in files: | |
loader = SimpleDirectoryReader(input_files=[file.name]) | |
try: | |
document1 = loader.load_data() | |
for doc in document1: | |
doc.doc_id=os.path.basename(file.name) | |
doc.extra_info={"filename":os.path.basename(file.name)} | |
documents.append(doc) | |
newfiles.append(os.path.basename(file.name)) | |
except Exception as e: | |
print(e) | |
if (str(e) == "Cannot read an empty file"): | |
return "Please Wait! Files are uploading, Try again Later!" | |
corrupt.append(os.path.basename(file.name)) | |
i = i+1 | |
print(i) | |
pindex = pinecone.Index(brainName) | |
try: | |
print(pindex.describe_index_stats()) | |
except Exception as e: | |
print(e) | |
vector_store = PineconeVectorStore(pinecone_index=pindex) | |
storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20) | |
index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context) | |
client = MongoClient(mongo_key) | |
db = client['nbrain'] | |
collection = db['files'] | |
for prevfile in newfiles: | |
index.delete_ref_doc(prevfile, delete_from_docstore=True) | |
delete_file(collection,brainName,prevfile) | |
uploadedFiles=[] | |
j = 1 | |
for doc in documents: | |
try: | |
index.insert(doc) | |
if doc.doc_id not in uploadedFiles: | |
print(doc.doc_id) | |
add_file(collection,brainName,doc.doc_id) | |
uploadedFiles.append(doc.doc_id) | |
print(j) | |
j = j+1 | |
except Exception as e: | |
if doc.doc_id not in corrupt: | |
corrupt.append(doc.doc_id) | |
print("ERROR : "+str(e)) | |
print("Brain Updated") | |
try: | |
print(pindex.describe_index_stats()) | |
except Exception as e: | |
print(e) | |
print(corrupt) | |
if (len(corrupt) > 0): | |
return """Brain Updated! | |
Below files are corrupt/unformatted, and not added to the brain. | |
""" + str(corrupt) | |
return brainName+" Brain Updated!" | |