ONE_OFF / build.py
sharjeel1477's picture
Duplicate from sharjeel1477/ONE_OFF_TEST
5c50619
raw
history blame contribute delete
No virus
2.49 kB
from llama_index import SimpleDirectoryReader, VectorStoreIndex, LLMPredictor, PromptHelper, ServiceContext,StorageContext
from langchain import OpenAI
import logging
import sys
import os
import json
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
def updateBrain(brainName, files):
print(len(files))
print("Creating Temp brain")
print(brainName)
# set maximum input size
documents = []
corrupt = []
newfiles = []
i = 0
for file in files:
loader = SimpleDirectoryReader(input_files=[file.name])
try:
document1 = loader.load_data()
for doc in document1:
documents.append(doc)
newfiles.append(file.name)
except Exception as e:
print(e)
if (str(e) == "Cannot read an empty file"):
return "Please Wait! Files are uploading, Try again Later!"
corrupt.append(os.path.basename(file.name))
i = i+1
print(i)
storage_context = StorageContext.from_defaults()
service_context = ServiceContext.from_defaults(chunk_size=600,chunk_overlap=10)
index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context)
j = 1
for doc in documents:
print(newfiles[j-1])
try:
index.insert(doc)
except Exception as e:
corrupt.append(os.path.basename(newfiles[j-1]))
print("ERROR : "+str(e))
print(j)
j = j+1
# path = "data/"+brainName+".json"
# a = index.save_to_dict()
# with open(path, "w") as f:
# json.dump(a, f, indent=2)
index_name="./"+brainName
index.storage_context.persist(persist_dir=index_name)
print(brainName+" Temp Brain Created!")
for filename in os.listdir("data"):
file_path = os.path.join("data", filename)
print(file_path)
print(corrupt)
current_dir = os.getcwd()
# List all directories in the current directory
folders = [name for name in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, name))]
# Print the list of folders
for folder in folders:
print(folder)
if (len(corrupt) > 0):
return """Brain Created!
Below files are corrupt/unformatted, and not added to the brain.
""" + str(corrupt)
return brainName+" Temp Brain Created!"