from sentence_transformers import SentenceTransformer from setup.db_setup import get_mongo_client, get_mongo_url def get_embedding(text: str) -> list[float]: embedding_model = SentenceTransformer("thenlper/gte-large") if not text.strip(): print("Attempted to get embedding for empty text.") return [] embedding = embedding_model.encode(text) return embedding.tolist() def query_results(query, mongo_url): mongo_client = get_mongo_client(mongo_url) db = mongo_client["EU_Cities"] query_embedding = get_embedding(query) results = db.EU_cities_collection.aggregate([ { "$vectorSearch": { "index": "vector_index", "path": "embedding", "queryVector": query_embedding, "numCandidates": 150, "limit": 5 } } ]) return results def get_search_result(query, mongo_url): get_knowledge = query_results(query, mongo_url) print(get_knowledge) search_result = "" for result in get_knowledge: search_result += f"City: {result.get('city', 'N/A')}, Abstract: {result.get('combined', 'N/A')}\n" return search_result def get_context(query: str) -> str: mongo_url = get_mongo_url() source_information = get_search_result(query, mongo_url) combined_information = ( f"Query: {query}\nContinue to answer the query by using the Search Results:\n{source_information}." ) return combined_information