Spaces:
Sleeping
Sleeping
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import UnstructuredURLLoader | |
from langchain.vectorstores.faiss import FAISS | |
from langchain.embeddings import OpenAIEmbeddings | |
import pickle | |
# Load Data | |
urls = [ | |
"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-4-2023", | |
"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-6-2023", | |
"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-7-2023", | |
"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023", | |
"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-9-2023", | |
] | |
loader = UnstructuredURLLoader(urls=urls) | |
raw_documents = loader.load() | |
# Split text | |
text_splitter = RecursiveCharacterTextSplitter() | |
documents = text_splitter.split_documents(raw_documents) | |
# Load Data to vectorstore | |
embeddings = OpenAIEmbeddings() | |
vectorstore = FAISS.from_documents(documents, embeddings) | |
# Save vectorstore | |
with open("vectorstore.pkl", "wb") as f: | |
pickle.dump(vectorstore, f) | |