Spaces:
Running
Running
File size: 6,461 Bytes
b200bda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
""" This module provides the functions for node classification problem.
The functions in this module are not imported
into the top level `networkx` namespace.
You can access these functions by importing
the `networkx.algorithms.node_classification` modules,
then accessing the functions as attributes of `node_classification`.
For example:
>>> from networkx.algorithms import node_classification
>>> G = nx.path_graph(4)
>>> G.edges()
EdgeView([(0, 1), (1, 2), (2, 3)])
>>> G.nodes[0]["label"] = "A"
>>> G.nodes[3]["label"] = "B"
>>> node_classification.harmonic_function(G)
['A', 'A', 'B', 'B']
References
----------
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
Semi-supervised learning using gaussian fields and harmonic functions.
In ICML (Vol. 3, pp. 912-919).
"""
import networkx as nx
__all__ = ["harmonic_function", "local_and_global_consistency"]
@nx.utils.not_implemented_for("directed")
@nx._dispatch(node_attrs="label_name")
def harmonic_function(G, max_iter=30, label_name="label"):
"""Node classification by Harmonic function
Function for computing Harmonic function algorithm by Zhu et al.
Parameters
----------
G : NetworkX Graph
max_iter : int
maximum number of iterations allowed
label_name : string
name of target labels to predict
Returns
-------
predicted : list
List of length ``len(G)`` with the predicted labels for each node.
Raises
------
NetworkXError
If no nodes in `G` have attribute `label_name`.
Examples
--------
>>> from networkx.algorithms import node_classification
>>> G = nx.path_graph(4)
>>> G.nodes[0]["label"] = "A"
>>> G.nodes[3]["label"] = "B"
>>> G.nodes(data=True)
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
>>> G.edges()
EdgeView([(0, 1), (1, 2), (2, 3)])
>>> predicted = node_classification.harmonic_function(G)
>>> predicted
['A', 'A', 'B', 'B']
References
----------
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
Semi-supervised learning using gaussian fields and harmonic functions.
In ICML (Vol. 3, pp. 912-919).
"""
import numpy as np
import scipy as sp
X = nx.to_scipy_sparse_array(G) # adjacency matrix
labels, label_dict = _get_label_info(G, label_name)
if labels.shape[0] == 0:
raise nx.NetworkXError(
f"No node on the input graph is labeled by '{label_name}'."
)
n_samples = X.shape[0]
n_classes = label_dict.shape[0]
F = np.zeros((n_samples, n_classes))
# Build propagation matrix
degrees = X.sum(axis=0)
degrees[degrees == 0] = 1 # Avoid division by 0
# TODO: csr_array
D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))
P = (D @ X).tolil()
P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
# Build base matrix
B = np.zeros((n_samples, n_classes))
B[labels[:, 0], labels[:, 1]] = 1
for _ in range(max_iter):
F = (P @ F) + B
return label_dict[np.argmax(F, axis=1)].tolist()
@nx.utils.not_implemented_for("directed")
@nx._dispatch(node_attrs="label_name")
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
"""Node classification by Local and Global Consistency
Function for computing Local and global consistency algorithm by Zhou et al.
Parameters
----------
G : NetworkX Graph
alpha : float
Clamping factor
max_iter : int
Maximum number of iterations allowed
label_name : string
Name of target labels to predict
Returns
-------
predicted : list
List of length ``len(G)`` with the predicted labels for each node.
Raises
------
NetworkXError
If no nodes in `G` have attribute `label_name`.
Examples
--------
>>> from networkx.algorithms import node_classification
>>> G = nx.path_graph(4)
>>> G.nodes[0]["label"] = "A"
>>> G.nodes[3]["label"] = "B"
>>> G.nodes(data=True)
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
>>> G.edges()
EdgeView([(0, 1), (1, 2), (2, 3)])
>>> predicted = node_classification.local_and_global_consistency(G)
>>> predicted
['A', 'A', 'B', 'B']
References
----------
Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
Learning with local and global consistency.
Advances in neural information processing systems, 16(16), 321-328.
"""
import numpy as np
import scipy as sp
X = nx.to_scipy_sparse_array(G) # adjacency matrix
labels, label_dict = _get_label_info(G, label_name)
if labels.shape[0] == 0:
raise nx.NetworkXError(
f"No node on the input graph is labeled by '{label_name}'."
)
n_samples = X.shape[0]
n_classes = label_dict.shape[0]
F = np.zeros((n_samples, n_classes))
# Build propagation matrix
degrees = X.sum(axis=0)
degrees[degrees == 0] = 1 # Avoid division by 0
# TODO: csr_array
D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
P = alpha * ((D2 @ X) @ D2)
# Build base matrix
B = np.zeros((n_samples, n_classes))
B[labels[:, 0], labels[:, 1]] = 1 - alpha
for _ in range(max_iter):
F = (P @ F) + B
return label_dict[np.argmax(F, axis=1)].tolist()
def _get_label_info(G, label_name):
"""Get and return information of labels from the input graph
Parameters
----------
G : Network X graph
label_name : string
Name of the target label
Returns
-------
labels : numpy array, shape = [n_labeled_samples, 2]
Array of pairs of labeled node ID and label ID
label_dict : numpy array, shape = [n_classes]
Array of labels
i-th element contains the label corresponding label ID `i`
"""
import numpy as np
labels = []
label_to_id = {}
lid = 0
for i, n in enumerate(G.nodes(data=True)):
if label_name in n[1]:
label = n[1][label_name]
if label not in label_to_id:
label_to_id[label] = lid
lid += 1
labels.append([i, label_to_id[label]])
labels = np.array(labels)
label_dict = np.array(
[label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
)
return (labels, label_dict)
|