File size: 695 Bytes
8cf4b8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from datasets import load_dataset
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Load dataset
dataset = load_dataset("facebook/kilt_tasks", "fever", split="train[:10%]")  

# Convert to documents
documents = []
for item in dataset:
    text = item['input']
    documents.append(Document(page_content=text, metadata={"id": item['id']}))

# Embed documents
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents, embeddings)

# Save FAISS index
db.save_local("vectorstore")
print("✅ Saved vectorstore!")