radio-mlbee / radio_mlbee /cos_matrix2.py
freemt
Ready 0.1.0a0
bccd6e8
"""Compute the cosine similarity matrix of A.
https://stackoverflow.com/questions/17627219/whats-the-fastest-way-in-python-to-calculate-cosine-similarity-given-sparse-mat
"""
from typing import Optional
import numpy as np
def cos_matrix2(mat1: np.ndarray, mat2: Optional[np.ndarray] = None) -> np.ndarray:
"""Compute the cosine similarity matrix of mat1, mat2: mat1 * mat2.T.
Args:
mat1: np.asarray
mat2: [Optional], if not present mat2 = mat1
Returns
cosine similarity
"""
if not isinstance(mat1, np.ndarray):
mat1 = np.asarray(mat1, dtype=np.float32)
if mat2 is None:
mat2 = mat1.copy()
if not isinstance(mat2, np.ndarray):
mat2 = np.asarray(mat2, dtype=np.float32)
if mat1.shape[1] != mat2.shape[1]:
print("shape mismatch: %s, %s", mat1.shape, mat2.shape)
raise SystemError(1)
cosine = np.dot(mat1, mat2.T)
norm1 = np.linalg.norm(mat1, axis=1)
norm2 = np.linalg.norm(mat2, axis=1)
# if not (norm1 and norm2): return 0
size1 = norm1.size
size2 = norm2.size
norm_mat = np.dot(norm1.reshape(size1, 1), norm2.reshape(1, size2))
for idx in range(size1):
for jdx in range(size2):
if norm_mat[idx, jdx] == 0:
cosine[idx, jdx] = 0.0
else:
cosine[idx, jdx] = cosine[idx, jdx] / norm_mat[idx, jdx]
return cosine