Spaces:
Runtime error
Runtime error
sharjeel1477
commited on
Commit
•
46540ed
1
Parent(s):
b8bdfed
Update build.py
Browse files
build.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
-
from llama_index import
|
2 |
-
from
|
|
|
3 |
import pinecone
|
4 |
import logging
|
5 |
import sys
|
6 |
import os
|
7 |
-
|
8 |
|
9 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
10 |
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
@@ -17,22 +18,10 @@ def updateBrain(brainName, files):
|
|
17 |
print(len(files))
|
18 |
print("Updating brain")
|
19 |
print(brainName)
|
20 |
-
|
21 |
-
max_input_size = 4096
|
22 |
-
# set number of output tokens
|
23 |
-
num_outputs = 2000
|
24 |
-
# set maximum chunk overlap
|
25 |
-
max_chunk_overlap = 20
|
26 |
-
# set chunk size limit
|
27 |
-
chunk_size_limit = 512
|
28 |
-
|
29 |
pinecone.init(api_key=pinecone_key,
|
30 |
environment="us-west4-gcp")
|
31 |
-
|
32 |
-
llm_predictor = LLMPredictor(llm=OpenAI(
|
33 |
-
temperature=0.7, model_name="text-davinci-003", max_tokens=num_outputs))
|
34 |
-
prompt_helper = PromptHelper(
|
35 |
-
max_input_size, num_outputs, max_chunk_overlap)
|
36 |
|
37 |
service_context = ServiceContext.from_defaults(
|
38 |
prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
|
@@ -43,11 +32,13 @@ def updateBrain(brainName, files):
|
|
43 |
i = 0
|
44 |
for file in files:
|
45 |
loader = SimpleDirectoryReader(input_files=[file.name])
|
46 |
-
|
47 |
try:
|
48 |
document1 = loader.load_data()
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
except Exception as e:
|
52 |
print(e)
|
53 |
if (str(e) == "Cannot read an empty file"):
|
@@ -63,16 +54,18 @@ def updateBrain(brainName, files):
|
|
63 |
except Exception as e:
|
64 |
print(e)
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
j = 1
|
70 |
for doc in documents:
|
71 |
print(newfiles[j-1])
|
72 |
try:
|
73 |
index.insert(doc)
|
74 |
except Exception as e:
|
75 |
-
corrupt.append(
|
76 |
print("ERROR : "+str(e))
|
77 |
|
78 |
print(j)
|
|
|
1 |
+
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptHelper
|
2 |
+
from llama_index.vector_stores import PineconeVectorStore
|
3 |
+
from llama_index.storage.storage_context import StorageContext
|
4 |
import pinecone
|
5 |
import logging
|
6 |
import sys
|
7 |
import os
|
8 |
+
import openai
|
9 |
|
10 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
11 |
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
|
|
18 |
print(len(files))
|
19 |
print("Updating brain")
|
20 |
print(brainName)
|
21 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
pinecone.init(api_key=pinecone_key,
|
23 |
environment="us-west4-gcp")
|
24 |
+
|
|
|
|
|
|
|
|
|
25 |
|
26 |
service_context = ServiceContext.from_defaults(
|
27 |
prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
|
|
|
32 |
i = 0
|
33 |
for file in files:
|
34 |
loader = SimpleDirectoryReader(input_files=[file.name])
|
|
|
35 |
try:
|
36 |
document1 = loader.load_data()
|
37 |
+
for doc in document1:
|
38 |
+
doc.doc_id=os.path.basename(file.name)
|
39 |
+
doc.extra_info={"filename":os.path.basename(file.name)}
|
40 |
+
documents.append(doc)
|
41 |
+
newfiles.append(os.path.basename(file.name))
|
42 |
except Exception as e:
|
43 |
print(e)
|
44 |
if (str(e) == "Cannot read an empty file"):
|
|
|
54 |
except Exception as e:
|
55 |
print(e)
|
56 |
|
57 |
+
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
|
58 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
59 |
+
service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20)
|
60 |
+
index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context)
|
61 |
+
|
62 |
j = 1
|
63 |
for doc in documents:
|
64 |
print(newfiles[j-1])
|
65 |
try:
|
66 |
index.insert(doc)
|
67 |
except Exception as e:
|
68 |
+
corrupt.append(newfiles[j-1])
|
69 |
print("ERROR : "+str(e))
|
70 |
|
71 |
print(j)
|