File size: 2,582 Bytes
d5fbedd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from llama_index import SimpleDirectoryReader, GPTPineconeIndex, LLMPredictor, PromptHelper, ServiceContext
from langchain import OpenAI
import pinecone
import logging
import sys
import os


logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


pinecone_key = os.environ['PINECONE_KEY']


def updateBrain(brainName, files):
    print(len(files))
    print("Updating brain")
    print(brainName)
    # set maximum input size
    max_input_size = 4096
    # set number of output tokens
    num_outputs = 2000
    # set maximum chunk overlap
    max_chunk_overlap = 20
    # set chunk size limit
    chunk_size_limit = 512

    pinecone.init(api_key=pinecone_key,
                  environment="us-west4-gcp")
    # define LLM
    llm_predictor = LLMPredictor(llm=OpenAI(
        temperature=0.7, model_name="text-davinci-003", max_tokens=num_outputs))
    prompt_helper = PromptHelper(
        max_input_size, num_outputs, max_chunk_overlap)

    service_context = ServiceContext.from_defaults(
        prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
    documents = []
    corrupt = []
    newfiles = []

    i = 0
    for file in files:
        loader = SimpleDirectoryReader(input_files=[file.name])

        try:
            document1 = loader.load_data()
            documents.append(document1[0])
            newfiles.append(file.name)
        except Exception as e:
            print(e)
            if (str(e) == "Cannot read an empty file"):
                return "Please Wait! Files are uploading, Try again Later!"
            corrupt.append(os.path.basename(file.name))
        i = i+1
        print(i)

    pindex = pinecone.Index(brainName)

    try:
        print(pindex.describe_index_stats())
    except Exception as e:
        print(e)

    index = GPTPineconeIndex.from_documents(
        [], pinecone_index=pindex, service_context=service_context)

    j = 1
    for doc in documents:
        print(newfiles[j-1])
        try:
            index.insert(doc)
        except Exception as e:
            corrupt.append(os.path.basename(newfiles[j-1]))
            print("ERROR : "+str(e))

        print(j)
        j = j+1

    print("Brain Updated")
    try:
        print(pindex.describe_index_stats())
    except Exception as e:
        print(e)

    print(corrupt)

    if (len(corrupt) > 0):
        return """Brain Updated! 
        Below files are corrupt/unformatted, and not added to the brain.
         """ + str(corrupt)

    return brainName+" Brain Updated!"