Update main.py
Browse files
main.py
CHANGED
@@ -15,11 +15,15 @@ import json
|
|
15 |
import re
|
16 |
import random
|
17 |
import spacy
|
|
|
18 |
|
19 |
app = Flask(__name__)
|
20 |
|
21 |
global isServer
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
global baseFilePath
|
25 |
global jsonPath
|
@@ -203,16 +207,24 @@ def newFile(files, filepaths):
|
|
203 |
|
204 |
#redactedFile = baseFilePath + "redacted/" + redactedFile + ".txt"
|
205 |
redactedFile = baseFilePath + "redacted/" + redactedFile + ".txt"
|
206 |
-
|
|
|
207 |
loader = TextLoader(redactedFile, encoding='UTF-8')
|
|
|
208 |
documents = loader.load()
|
|
|
209 |
text_splitter = RecursiveCharacterTextSplitter(
|
210 |
chunk_size=300, chunk_overlap=0, separators=[" ", ",", "\n"]
|
211 |
)
|
|
|
212 |
texts = text_splitter.split_documents(documents)
|
|
|
213 |
print(texts)
|
|
|
214 |
chromaDirectory = baseFilePath + "chroma_db"
|
|
|
215 |
Chroma.from_documents(texts, embeddings, persist_directory=chromaDirectory)
|
|
|
216 |
print("Done processing: " + filepaths[count].split("/")[-1])
|
217 |
count = count + 1
|
218 |
|
|
|
15 |
import re
|
16 |
import random
|
17 |
import spacy
|
18 |
+
import platform
|
19 |
|
20 |
app = Flask(__name__)
|
21 |
|
22 |
global isServer
|
23 |
+
if platform.system() == "Darwin":
|
24 |
+
isServer = False
|
25 |
+
else:
|
26 |
+
isServer = True
|
27 |
|
28 |
global baseFilePath
|
29 |
global jsonPath
|
|
|
207 |
|
208 |
#redactedFile = baseFilePath + "redacted/" + redactedFile + ".txt"
|
209 |
redactedFile = baseFilePath + "redacted/" + redactedFile + ".txt"
|
210 |
+
|
211 |
+
print("1")
|
212 |
loader = TextLoader(redactedFile, encoding='UTF-8')
|
213 |
+
print("2")
|
214 |
documents = loader.load()
|
215 |
+
print("3")
|
216 |
text_splitter = RecursiveCharacterTextSplitter(
|
217 |
chunk_size=300, chunk_overlap=0, separators=[" ", ",", "\n"]
|
218 |
)
|
219 |
+
print("4")
|
220 |
texts = text_splitter.split_documents(documents)
|
221 |
+
print("5")
|
222 |
print(texts)
|
223 |
+
print("6")
|
224 |
chromaDirectory = baseFilePath + "chroma_db"
|
225 |
+
print("7")
|
226 |
Chroma.from_documents(texts, embeddings, persist_directory=chromaDirectory)
|
227 |
+
print("8")
|
228 |
print("Done processing: " + filepaths[count].split("/")[-1])
|
229 |
count = count + 1
|
230 |
|