File size: 3,437 Bytes
9ca7a83
46540ed
 
991b663
 
 
 
46540ed
05f16d4
 
991b663
 
 
 
 
 
05f16d4
991b663
2ce8bfd
05f16d4
 
 
71f6dab
05f16d4
 
 
 
 
2ce8bfd
05f16d4
 
 
 
 
991b663
 
 
 
 
46540ed
991b663
 
46540ed
991b663
 
 
 
8c1b5fc
991b663
 
 
 
 
46540ed
 
 
 
 
991b663
 
 
 
 
 
 
 
36d34c2
991b663
 
 
 
 
 
a7890f5
46540ed
 
 
8c1b5fc
05f16d4
 
 
 
8c1b5fc
 
2ce8bfd
05f16d4
 
991b663
 
 
 
05f16d4
5a5ba11
2ce8bfd
05f16d4
5a5ba11
 
991b663
8c1b5fc
 
991b663
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.vector_stores import PineconeVectorStore
from llama_index.storage.storage_context import StorageContext
import pinecone
import logging
import sys
import os
import openai
from pymongo.mongo_client import MongoClient
from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


pinecone_key = os.environ['PINECONE_KEY']
mongo_key=os.environ["MONGO_KEY"]

def add_file(collection,username, filename):
    document = {
        "brain": username,
        "filename": filename,
        "namespace":None
        "timestamp": datetime.utcnow()
    }
    collection.insert_one(document)


def delete_file(collection,username, filename):
    query = {
        "brain": username,
        "filename": filename
    }
    collection.delete_one(query)

def updateBrain(brainName, files):
    print(len(files))
    print("Updating brain")
    print(brainName)
    
    pinecone.init(api_key=pinecone_key,
                  environment="us-west4-gcp")
   
    documents = []
    corrupt = []
    newfiles = []


    i = 0
    for file in files:
        loader = SimpleDirectoryReader(input_files=[file.name])
        try:
            document1 = loader.load_data()
            for doc in document1:
                doc.doc_id=os.path.basename(file.name)
                doc.extra_info={"filename":os.path.basename(file.name)}
                documents.append(doc)
            newfiles.append(os.path.basename(file.name))
        except Exception as e:
            print(e)
            if (str(e) == "Cannot read an empty file"):
                return "Please Wait! Files are uploading, Try again Later!"
            corrupt.append(os.path.basename(file.name))
        i = i+1
        print(i)

    pindex = pinecone.Index(brainName)

    try:
        print(pindex.describe_index_stats())
    except Exception as e:
        print(e)

    vector_store = PineconeVectorStore(pinecone_index=pindex)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20)
    index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context)

    client = MongoClient(mongo_key)
    db = client['nbrain']
    collection = db['files']

    for prevfile in newfiles:
        index.delete_ref_doc(prevfile, delete_from_docstore=True)
        delete_file(collection,brainName,prevfile)

    uploadedFiles=[]
    j = 1
    for doc in documents:
        try:
            index.insert(doc)
            if doc.doc_id not in uploadedFiles:
                print(doc.doc_id)
                add_file(collection,brainName,doc.doc_id)
                uploadedFiles.append(doc.doc_id)
                print(j)
                j = j+1
        except Exception as e:
            if doc.doc_id not in corrupt:
                corrupt.append(doc.doc_id)
            print("ERROR : "+str(e))


    print("Brain Updated")
    try:
        print(pindex.describe_index_stats())
    except Exception as e:
        print(e)

    print(corrupt)

    if (len(corrupt) > 0):
        return """Brain Updated! 
        Below files are corrupt/unformatted, and not added to the brain.
         """ + str(corrupt)

    return brainName+" Brain Updated!"