Spaces:
Sleeping
Sleeping
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| import hashlib | |
| import chromadb | |
| # Initialize the model | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Initialize the Chroma client | |
| client = chromadb.PersistentClient(path="chroma_db") | |
| # Initialize the collection | |
| collection = client.get_or_create_collection("test_collection", metadata={"hnsw:space": "cosine"}) | |
| # Function to embed and store the sentences in the collection | |
| def embed_and_store(sentences, collection): | |
| embeddings = model.encode(sentences) | |
| ids = [hashlib.sha256(sentence.encode()).hexdigest() for sentence in sentences] | |
| collection.upsert( | |
| documents=sentences, | |
| ids=ids, | |
| embeddings=embeddings, | |
| ) | |
| return ids | |
| # Function to perform semantic search | |
| def semantic_search(query, model=model, collection=collection, n_results=3): | |
| query_embedding = model.encode(query) | |
| results = collection.query( | |
| query_embeddings=query_embedding, | |
| n_results=n_results, | |
| ) | |
| return results | |
| sentences = [ | |
| "How do I read a file in Python?", | |
| "Open a file using the open() function", | |
| "Python file I/O tutorial", | |
| "What is a for loop?", | |
| "Iterating over a list in Python", | |
| "How to use list comprehensions", | |
| "Docker container vs Docker image", | |
| "What is a Dockerfile?", | |
| "Sort a list in Python", | |
| "The sky is blue and the sun is yellow" | |
| ] | |
| ids = embed_and_store(sentences, collection) | |
| user_query = "reading and writing files in Python" | |
| results = semantic_search(user_query, model=model, collection=collection, n_results=3) | |
| print(results) |