Spaces:
Running
Running
from gensim.models import KeyedVectors | |
from gensim.utils import simple_preprocess | |
import gensim.downloader as api | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
class SearchEngine: | |
def __init__(self, model_api_path='glove-wiki-gigaword-100'): | |
self.model = api.load(model_api_path) | |
self.prompts = [] | |
self.vectors = [] | |
def vectorize(self, prompt): | |
words = simple_preprocess(prompt) | |
vectors = [self.model[word] for word in words if word in self.model] | |
return np.mean(vectors, axis=0) | |
def add(self, prompt): | |
vector = self.vectorize(prompt) | |
self.prompts.append(prompt) | |
self.vectors.append(vector) | |
def search(self, input_prompt): | |
vectorized_input = self.vectorize(input_prompt) | |
similarity_scores = cosine_similarity([vectorized_input], self.vectors)[0] | |
most_similar_idx = np.argmax(similarity_scores) | |
return self.prompts[most_similar_idx] | |
if __name__ == "__main__": | |
storage = SearchEngine() | |
storage.add("I love programming") | |
storage.add("You need to graduate") | |
storage.add("Library is open") | |
input_prompt = "I enjoy coding" | |
most_similar_prompt = storage.search(input_prompt) | |
print(f"Input Prompt: {input_prompt}") | |
print(f"Most Similar Prompt: {most_similar_prompt}") | |