goingnowhere / train_dataset.py
robkaandorp's picture
Add train_dataset.py
6ea7f47
raw history blame
No virus
527 Bytes
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# load Chroma
db = Chroma(embedding_function=embedding_function, persist_directory="./chroma_db")
print("There are", db._collection.count(), " docs in the collection")
docs = db._collection.peek(10)
for doc in docs['documents']:
print(doc)