Gabriel commited on
Commit
fe74ce1
1 Parent(s): c1e3d1b

Update LexRank.py

Browse files
Files changed (1) hide show
  1. LexRank.py +120 -0
LexRank.py CHANGED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ from scipy.sparse.csgraph import connected_components
4
+ from scipy.special import softmax
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def degree_centrality_scores(
10
+ similarity_matrix,
11
+ threshold=None,
12
+ increase_power=True,
13
+ ):
14
+ if not (
15
+ threshold is None
16
+ or isinstance(threshold, float)
17
+ and 0 <= threshold < 1
18
+ ):
19
+ raise ValueError(
20
+ '\'threshold\' should be a floating-point number '
21
+ 'from the interval [0, 1) or None',
22
+ )
23
+
24
+ if threshold is None:
25
+ markov_matrix = create_markov_matrix(similarity_matrix)
26
+
27
+ else:
28
+ markov_matrix = create_markov_matrix_discrete(
29
+ similarity_matrix,
30
+ threshold,
31
+ )
32
+
33
+ scores = stationary_distribution(
34
+ markov_matrix,
35
+ increase_power=increase_power,
36
+ normalized=False,
37
+ )
38
+
39
+ return scores
40
+
41
+
42
+ def _power_method(transition_matrix, increase_power=True, max_iter=10000):
43
+ eigenvector = np.ones(len(transition_matrix))
44
+
45
+ if len(eigenvector) == 1:
46
+ return eigenvector
47
+
48
+ transition = transition_matrix.transpose()
49
+
50
+ for _ in range(max_iter):
51
+ eigenvector_next = np.dot(transition, eigenvector)
52
+
53
+ if np.allclose(eigenvector_next, eigenvector):
54
+ return eigenvector_next
55
+
56
+ eigenvector = eigenvector_next
57
+
58
+ if increase_power:
59
+ transition = np.dot(transition, transition)
60
+
61
+ logger.warning("Maximum number of iterations for power method exceeded without convergence!")
62
+ return eigenvector_next
63
+
64
+
65
+ def connected_nodes(matrix):
66
+ _, labels = connected_components(matrix)
67
+
68
+ groups = []
69
+
70
+ for tag in np.unique(labels):
71
+ group = np.where(labels == tag)[0]
72
+ groups.append(group)
73
+
74
+ return groups
75
+
76
+
77
+ def create_markov_matrix(weights_matrix):
78
+ n_1, n_2 = weights_matrix.shape
79
+ if n_1 != n_2:
80
+ raise ValueError('\'weights_matrix\' should be square')
81
+
82
+ row_sum = weights_matrix.sum(axis=1, keepdims=True)
83
+
84
+ # normalize probability distribution differently if we have negative transition values
85
+ if np.min(weights_matrix) <= 0:
86
+ return softmax(weights_matrix, axis=1)
87
+
88
+ return weights_matrix / row_sum
89
+
90
+
91
+ def create_markov_matrix_discrete(weights_matrix, threshold):
92
+ discrete_weights_matrix = np.zeros(weights_matrix.shape)
93
+ ixs = np.where(weights_matrix >= threshold)
94
+ discrete_weights_matrix[ixs] = 1
95
+
96
+ return create_markov_matrix(discrete_weights_matrix)
97
+
98
+
99
+ def stationary_distribution(
100
+ transition_matrix,
101
+ increase_power=True,
102
+ normalized=True,
103
+ ):
104
+ n_1, n_2 = transition_matrix.shape
105
+ if n_1 != n_2:
106
+ raise ValueError('\'transition_matrix\' should be square')
107
+
108
+ distribution = np.zeros(n_1)
109
+
110
+ grouped_indices = connected_nodes(transition_matrix)
111
+
112
+ for group in grouped_indices:
113
+ t_matrix = transition_matrix[np.ix_(group, group)]
114
+ eigenvector = _power_method(t_matrix, increase_power=increase_power)
115
+ distribution[group] = eigenvector
116
+
117
+ if normalized:
118
+ distribution /= n_1
119
+
120
+ return distribution