Update ingest_data.py
Browse files- ingest_data.py +3 -3
ingest_data.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
-
from langchain.document_loaders import
|
3 |
from langchain.vectorstores.faiss import FAISS
|
4 |
from langchain.embeddings import OpenAIEmbeddings
|
5 |
import pickle
|
6 |
|
7 |
# Load Data
|
8 |
-
loader =
|
9 |
raw_documents = loader.load()
|
10 |
|
11 |
# Split text
|
@@ -20,4 +20,4 @@ vectorstore = FAISS.from_documents(documents, embeddings)
|
|
20 |
|
21 |
# Save vectorstore
|
22 |
with open("vectorstore.pkl", "wb") as f:
|
23 |
-
pickle.dump(vectorstore, f)
|
|
|
1 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
3 |
from langchain.vectorstores.faiss import FAISS
|
4 |
from langchain.embeddings import OpenAIEmbeddings
|
5 |
import pickle
|
6 |
|
7 |
# Load Data
|
8 |
+
loader = UnstructuredFileLoader("SMR4clean.txt")
|
9 |
raw_documents = loader.load()
|
10 |
|
11 |
# Split text
|
|
|
20 |
|
21 |
# Save vectorstore
|
22 |
with open("vectorstore.pkl", "wb") as f:
|
23 |
+
pickle.dump(vectorstore, f)
|