patti-j commited on
Commit
7944524
1 Parent(s): 550b035

Create ingest_data.py

Browse files
Files changed (1) hide show
  1. ingest_data.py +29 -0
ingest_data.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredFileLoader
3
+ from langchain.vectorstores.faiss import FAISS
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ import pickle
6
+
7
+ # Load Data
8
+ loader = UnstructuredFileLoader("state_of_the_union.txt")
9
+ raw_documents = loader.load()
10
+
11
+ # Split text
12
+ text_splitter = RecursiveCharacterTextSplitter()
13
+ documents = text_splitter.split_documents(raw_documents)
14
+
15
+
16
+ # Load Data to vectorstore
17
+ embeddings = OpenAIEmbeddings()
18
+ vectorstore = FAISS.from_documents(documents, embeddings)
19
+
20
+ query = "What is Sales Handoff?"
21
+ docs = vectorstore.similarity_search(query)
22
+ db = FAISS.from_documents(docs, embeddings)
23
+
24
+ print(docs[0].page_content)
25
+
26
+ # Save vectorstore
27
+ # with open("vectorstore.pkl", "wb") as f:
28
+ # pickle.dump(vectorstore, f)
29
+