prompt-engine / src /similarity_scorer.py
Lazar Radojevic
working version
be043a6
import numpy as np
class SimilarityScorer:
def cosine_similarity(
self,
query_vector: np.ndarray,
corpus_vectors: np.ndarray,
) -> np.ndarray:
"""Calculate cosine similarity between prompt vectors.
Args:
query_vector: Vectorized prompt query of shape (1, D).
corpus_vectors: Vectorized prompt corpus of shape (N, D).
Returns: The vector of shape (N,) with values in range [-1, 1] where 1
is max similarity i.e., two vectors are the same.
"""
# Normalize the query vector
query_norm = np.linalg.norm(query_vector)
if query_norm == 0:
raise ValueError("The query vector cannot be zero.")
query_vector = query_vector / query_norm
# Normalize the corpus vectors
corpus_norms = np.linalg.norm(corpus_vectors, axis=1)
if np.any(corpus_norms == 0):
raise ValueError("The corpus contains zero vectors.")
normalized_corpus = corpus_vectors / corpus_norms[:, np.newaxis]
# Calculate cosine similarity
similarities = np.dot(normalized_corpus, query_vector.T)
return similarities