robkaandorp
/

goingnowhere

Model card Files Files and versions Community

robkaandorp commited on Apr 15

Commit

6ea7f47

•

1 Parent(s): 0c9e1ab

Add train_dataset.py

Files changed (1) hide show

train_dataset.py +17 -0

train_dataset.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from langchain_community.embeddings.sentence_transformer import (
+    SentenceTransformerEmbeddings,
+)
+from langchain_community.vectorstores import Chroma
+# create the open-source embedding function
+embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+# load Chroma
+db = Chroma(embedding_function=embedding_function, persist_directory="./chroma_db")
+print("There are", db._collection.count(), " docs in the collection")
+docs = db._collection.peek(10)
+for doc in docs['documents']:
+  print(doc)