Spaces:
Sleeping
Sleeping
import numpy as np | |
class SimilarityScorer: | |
def cosine_similarity( | |
self, | |
query_vector: np.ndarray, | |
corpus_vectors: np.ndarray, | |
) -> np.ndarray: | |
"""Calculate cosine similarity between prompt vectors. | |
Args: | |
query_vector: Vectorized prompt query of shape (1, D). | |
corpus_vectors: Vectorized prompt corpus of shape (N, D). | |
Returns: The vector of shape (N,) with values in range [-1, 1] where 1 | |
is max similarity i.e., two vectors are the same. | |
""" | |
# Normalize the query vector | |
query_norm = np.linalg.norm(query_vector) | |
if query_norm == 0: | |
raise ValueError("The query vector cannot be zero.") | |
query_vector = query_vector / query_norm | |
# Normalize the corpus vectors | |
corpus_norms = np.linalg.norm(corpus_vectors, axis=1) | |
if np.any(corpus_norms == 0): | |
raise ValueError("The corpus contains zero vectors.") | |
normalized_corpus = corpus_vectors / corpus_norms[:, np.newaxis] | |
# Calculate cosine similarity | |
similarities = np.dot(normalized_corpus, query_vector.T) | |
return similarities | |