goingnowhere / query_dataset.py
robkaandorp's picture
Add create_dataset.py
3b327ab
raw history blame
No virus
834 Bytes
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# load it into Chroma
db = Chroma(embedding_function=embedding_function, persist_directory="./chroma_db")
print("There are", db._collection.count(), " docs in the collection")
queries = [
"Where is the Nowhere event?",
"Give me some information about the toilets.",
"What is consent?",
]
for query in queries:
# query it
docs = db.similarity_search(query)
# print results
print(f"\n\nQuery: {query}")
print(f"Results: {len(docs)}")
print(f"First result: {docs[0].page_content}")
print(f"Second result: {docs[1].page_content}")