robkaandorp commited on
Commit
6ea7f47
1 Parent(s): 0c9e1ab

Add train_dataset.py

Browse files
Files changed (1) hide show
  1. train_dataset.py +17 -0
train_dataset.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.embeddings.sentence_transformer import (
2
+ SentenceTransformerEmbeddings,
3
+ )
4
+ from langchain_community.vectorstores import Chroma
5
+
6
+ # create the open-source embedding function
7
+ embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
8
+
9
+ # load Chroma
10
+ db = Chroma(embedding_function=embedding_function, persist_directory="./chroma_db")
11
+
12
+ print("There are", db._collection.count(), " docs in the collection")
13
+
14
+ docs = db._collection.peek(10)
15
+
16
+ for doc in docs['documents']:
17
+ print(doc)