File size: 1,338 Bytes
ce0d4fb
ba93a7e
ce0d4fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
from numba import jit, prange


# https://github.com/talboger/fastdist

@jit(nopython=True, fastmath=True)
def euclidean_vector_to_matrix_distance(u, m):
    """
    :purpose:
    Computes the distance between a vector and the rows of a matrix using any given metric

    :params:
    u      : input vector of shape (n,)
    m      : input matrix of shape (m, n)

    distance vector  : np.array, of shape (m,) vector containing the distance between u
                       and the rows of m

    :example:
    >>> from fastdist import fastdist
    >>> import numpy as np
    >>> u = np.random.RandomState(seed=0).rand(10)
    >>> m = np.random.RandomState(seed=0).rand(100, 10)
    >>> fastdist.vector_to_matrix_distance(u, m)
    (returns an array of shape (100,))

    :note:
    the cosine similarity uses its own function, cosine_vector_to_matrix.
    this is because normalizing the rows and then taking the dot product
    of the vector and matrix heavily optimizes the computation. the other similarity
    metrics do not have such an optimization, so we loop through them
    """

    n = m.shape[0]
    out = np.zeros((n), dtype=np.float32)
    for i in prange(n):
        dist = 0
        for l in range(len(u)):
            dist += abs(u[l] - m[i][l]) ** 2
        out[i] = dist ** (1 / 2)

    return out