from gensim.models import KeyedVectors from gensim.utils import simple_preprocess import gensim.downloader as api from sklearn.metrics.pairwise import cosine_similarity import numpy as np class SearchEngine: def __init__(self, model_api_path='glove-wiki-gigaword-100'): self.model = api.load(model_api_path) self.prompts = [] self.vectors = [] def vectorize(self, prompt): words = simple_preprocess(prompt) vectors = [self.model[word] for word in words if word in self.model] return np.mean(vectors, axis=0) def add(self, prompt): vector = self.vectorize(prompt) self.prompts.append(prompt) self.vectors.append(vector) def search(self, input_prompt): vectorized_input = self.vectorize(input_prompt) similarity_scores = cosine_similarity([vectorized_input], self.vectors)[0] most_similar_idx = np.argmax(similarity_scores) return self.prompts[most_similar_idx] if __name__ == "__main__": storage = SearchEngine() storage.add("I love programming") storage.add("You need to graduate") storage.add("Library is open") input_prompt = "I enjoy coding" most_similar_prompt = storage.search(input_prompt) print(f"Input Prompt: {input_prompt}") print(f"Most Similar Prompt: {most_similar_prompt}")