New-Main-Upload / build.py
sharjeel1477's picture
Duplicate from sharjeel1477/Druck-Upload
d5fbedd
from llama_index import SimpleDirectoryReader, GPTPineconeIndex, LLMPredictor, PromptHelper, ServiceContext
from langchain import OpenAI
import pinecone
import logging
import sys
import os
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
pinecone_key = os.environ['PINECONE_KEY']
def updateBrain(brainName, files):
print(len(files))
print("Updating brain")
print(brainName)
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_outputs = 2000
# set maximum chunk overlap
max_chunk_overlap = 20
# set chunk size limit
chunk_size_limit = 512
pinecone.init(api_key=pinecone_key,
environment="us-west4-gcp")
# define LLM
llm_predictor = LLMPredictor(llm=OpenAI(
temperature=0.7, model_name="text-davinci-003", max_tokens=num_outputs))
prompt_helper = PromptHelper(
max_input_size, num_outputs, max_chunk_overlap)
service_context = ServiceContext.from_defaults(
prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
documents = []
corrupt = []
newfiles = []
i = 0
for file in files:
loader = SimpleDirectoryReader(input_files=[file.name])
try:
document1 = loader.load_data()
documents.append(document1[0])
newfiles.append(file.name)
except Exception as e:
print(e)
if (str(e) == "Cannot read an empty file"):
return "Please Wait! Files are uploading, Try again Later!"
corrupt.append(os.path.basename(file.name))
i = i+1
print(i)
pindex = pinecone.Index(brainName)
try:
print(pindex.describe_index_stats())
except Exception as e:
print(e)
index = GPTPineconeIndex.from_documents(
[], pinecone_index=pindex, service_context=service_context)
j = 1
for doc in documents:
print(newfiles[j-1])
try:
index.insert(doc)
except Exception as e:
corrupt.append(os.path.basename(newfiles[j-1]))
print("ERROR : "+str(e))
print(j)
j = j+1
print("Brain Updated")
try:
print(pindex.describe_index_stats())
except Exception as e:
print(e)
print(corrupt)
if (len(corrupt) > 0):
return """Brain Updated!
Below files are corrupt/unformatted, and not added to the brain.
""" + str(corrupt)
return brainName+" Brain Updated!"