sharjeel1477 commited on
Commit
46540ed
1 Parent(s): b8bdfed

Update build.py

Browse files
Files changed (1) hide show
  1. build.py +17 -24
build.py CHANGED
@@ -1,10 +1,11 @@
1
- from llama_index import SimpleDirectoryReader, GPTPineconeIndex, LLMPredictor, PromptHelper, ServiceContext
2
- from langchain import OpenAI
 
3
  import pinecone
4
  import logging
5
  import sys
6
  import os
7
-
8
 
9
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
10
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
@@ -17,22 +18,10 @@ def updateBrain(brainName, files):
17
  print(len(files))
18
  print("Updating brain")
19
  print(brainName)
20
- # set maximum input size
21
- max_input_size = 4096
22
- # set number of output tokens
23
- num_outputs = 2000
24
- # set maximum chunk overlap
25
- max_chunk_overlap = 20
26
- # set chunk size limit
27
- chunk_size_limit = 512
28
-
29
  pinecone.init(api_key=pinecone_key,
30
  environment="us-west4-gcp")
31
- # define LLM
32
- llm_predictor = LLMPredictor(llm=OpenAI(
33
- temperature=0.7, model_name="text-davinci-003", max_tokens=num_outputs))
34
- prompt_helper = PromptHelper(
35
- max_input_size, num_outputs, max_chunk_overlap)
36
 
37
  service_context = ServiceContext.from_defaults(
38
  prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
@@ -43,11 +32,13 @@ def updateBrain(brainName, files):
43
  i = 0
44
  for file in files:
45
  loader = SimpleDirectoryReader(input_files=[file.name])
46
-
47
  try:
48
  document1 = loader.load_data()
49
- documents.append(document1[0])
50
- newfiles.append(file.name)
 
 
 
51
  except Exception as e:
52
  print(e)
53
  if (str(e) == "Cannot read an empty file"):
@@ -63,16 +54,18 @@ def updateBrain(brainName, files):
63
  except Exception as e:
64
  print(e)
65
 
66
- index = GPTPineconeIndex.from_documents(
67
- [], pinecone_index=pindex, service_context=service_context)
68
-
 
 
69
  j = 1
70
  for doc in documents:
71
  print(newfiles[j-1])
72
  try:
73
  index.insert(doc)
74
  except Exception as e:
75
- corrupt.append(os.path.basename(newfiles[j-1]))
76
  print("ERROR : "+str(e))
77
 
78
  print(j)
 
1
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptHelper
2
+ from llama_index.vector_stores import PineconeVectorStore
3
+ from llama_index.storage.storage_context import StorageContext
4
  import pinecone
5
  import logging
6
  import sys
7
  import os
8
+ import openai
9
 
10
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
11
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 
18
  print(len(files))
19
  print("Updating brain")
20
  print(brainName)
21
+
 
 
 
 
 
 
 
 
22
  pinecone.init(api_key=pinecone_key,
23
  environment="us-west4-gcp")
24
+
 
 
 
 
25
 
26
  service_context = ServiceContext.from_defaults(
27
  prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
 
32
  i = 0
33
  for file in files:
34
  loader = SimpleDirectoryReader(input_files=[file.name])
 
35
  try:
36
  document1 = loader.load_data()
37
+ for doc in document1:
38
+ doc.doc_id=os.path.basename(file.name)
39
+ doc.extra_info={"filename":os.path.basename(file.name)}
40
+ documents.append(doc)
41
+ newfiles.append(os.path.basename(file.name))
42
  except Exception as e:
43
  print(e)
44
  if (str(e) == "Cannot read an empty file"):
 
54
  except Exception as e:
55
  print(e)
56
 
57
+ vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
58
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
59
+ service_context = ServiceContext.from_defaults(chunk_size=512,chunk_overlap=20)
60
+ index = VectorStoreIndex.from_documents([], storage_context=storage_context,service_context=service_context)
61
+
62
  j = 1
63
  for doc in documents:
64
  print(newfiles[j-1])
65
  try:
66
  index.insert(doc)
67
  except Exception as e:
68
+ corrupt.append(newfiles[j-1])
69
  print("ERROR : "+str(e))
70
 
71
  print(j)