|
from llama_index import SimpleDirectoryReader, GPTPineconeIndex, LLMPredictor, PromptHelper, ServiceContext |
|
from langchain import OpenAI |
|
import pinecone |
|
import logging |
|
import sys |
|
import os |
|
|
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO) |
|
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) |
|
|
|
|
|
pinecone_key = os.environ['PINECONE_KEY'] |
|
|
|
|
|
def updateBrain(brainName, files): |
|
print(len(files)) |
|
print("Updating brain") |
|
print(brainName) |
|
|
|
max_input_size = 4096 |
|
|
|
num_outputs = 2000 |
|
|
|
max_chunk_overlap = 20 |
|
|
|
chunk_size_limit = 512 |
|
|
|
pinecone.init(api_key=pinecone_key, |
|
environment="us-west4-gcp") |
|
|
|
llm_predictor = LLMPredictor(llm=OpenAI( |
|
temperature=0.7, model_name="text-davinci-003", max_tokens=num_outputs)) |
|
prompt_helper = PromptHelper( |
|
max_input_size, num_outputs, max_chunk_overlap) |
|
|
|
service_context = ServiceContext.from_defaults( |
|
prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit) |
|
documents = [] |
|
corrupt = [] |
|
newfiles = [] |
|
|
|
i = 0 |
|
for file in files: |
|
loader = SimpleDirectoryReader(input_files=[file.name]) |
|
|
|
try: |
|
document1 = loader.load_data() |
|
documents.append(document1[0]) |
|
newfiles.append(file.name) |
|
except Exception as e: |
|
print(e) |
|
if (str(e) == "Cannot read an empty file"): |
|
return "Please Wait! Files are uploading, Try again Later!" |
|
corrupt.append(os.path.basename(file.name)) |
|
i = i+1 |
|
print(i) |
|
|
|
pindex = pinecone.Index(brainName) |
|
|
|
try: |
|
print(pindex.describe_index_stats()) |
|
except Exception as e: |
|
print(e) |
|
|
|
index = GPTPineconeIndex.from_documents( |
|
[], pinecone_index=pindex, service_context=service_context) |
|
|
|
j = 1 |
|
for doc in documents: |
|
print(newfiles[j-1]) |
|
try: |
|
index.insert(doc) |
|
except Exception as e: |
|
corrupt.append(os.path.basename(newfiles[j-1])) |
|
print("ERROR : "+str(e)) |
|
|
|
print(j) |
|
j = j+1 |
|
|
|
print("Brain Updated") |
|
try: |
|
print(pindex.describe_index_stats()) |
|
except Exception as e: |
|
print(e) |
|
|
|
print(corrupt) |
|
|
|
if (len(corrupt) > 0): |
|
return """Brain Updated! |
|
Below files are corrupt/unformatted, and not added to the brain. |
|
""" + str(corrupt) |
|
|
|
return brainName+" Brain Updated!" |
|
|