jamescg commited on
Commit
09a3c70
1 Parent(s): d75d351

Update ingest_data.py

Browse files
Files changed (1) hide show
  1. ingest_data.py +3 -3
ingest_data.py CHANGED
@@ -1,11 +1,11 @@
1
  from langchain.text_splitter import RecursiveCharacterTextSplitter
2
- from langchain.document_loaders import PagedPDFSplitter
3
  from langchain.vectorstores.faiss import FAISS
4
  from langchain.embeddings import OpenAIEmbeddings
5
  import pickle
6
 
7
  # Load Data
8
- loader = PagedPDFSplitter("SMR4clean.txt")
9
  raw_documents = loader.load()
10
 
11
  # Split text
@@ -20,4 +20,4 @@ vectorstore = FAISS.from_documents(documents, embeddings)
20
 
21
  # Save vectorstore
22
  with open("vectorstore.pkl", "wb") as f:
23
- pickle.dump(vectorstore, f)
 
1
  from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredFileLoader
3
  from langchain.vectorstores.faiss import FAISS
4
  from langchain.embeddings import OpenAIEmbeddings
5
  import pickle
6
 
7
  # Load Data
8
+ loader = UnstructuredFileLoader("SMR4clean.txt")
9
  raw_documents = loader.load()
10
 
11
  # Split text
 
20
 
21
  # Save vectorstore
22
  with open("vectorstore.pkl", "wb") as f:
23
+ pickle.dump(vectorstore, f)