Spaces:
Runtime error
Runtime error
""" | |
================== | |
Words/Ladder Graph | |
================== | |
Generate an undirected graph over the 5757 5-letter words in the datafile | |
`words_dat.txt.gz`. Two words are connected by an edge if they differ in one | |
letter, resulting in 14,135 edges. This example is described in Section 1.1 of | |
Donald E. Knuth, "The Stanford GraphBase: A Platform for Combinatorial | |
Computing", ACM Press, New York, 1993. | |
http://www-cs-faculty.stanford.edu/~knuth/sgb.html | |
The data file can be found at: | |
- https://github.com/networkx/networkx/blob/main/examples/graph/words_dat.txt.gz | |
""" | |
import gzip | |
from string import ascii_lowercase as lowercase | |
import matplotlib.pyplot as plt | |
import networkx as nx | |
def generate_graph(words): | |
G = nx.Graph(name="words") | |
lookup = {c: lowercase.index(c) for c in lowercase} | |
def edit_distance_one(word): | |
for i in range(len(word)): | |
left, c, right = word[0:i], word[i], word[i + 1 :] | |
j = lookup[c] # lowercase.index(c) | |
for cc in lowercase[j + 1 :]: | |
yield left + cc + right | |
candgen = ( | |
(word, cand) | |
for word in sorted(words) | |
for cand in edit_distance_one(word) | |
if cand in words | |
) | |
G.add_nodes_from(words) | |
for word, cand in candgen: | |
G.add_edge(word, cand) | |
return G | |
def words_graph(): | |
"""Return the words example graph from the Stanford GraphBase""" | |
fh = gzip.open("words_dat.txt.gz", "r") | |
words = set() | |
for line in fh.readlines(): | |
line = line.decode() | |
if line.startswith("*"): | |
continue | |
w = str(line[0:5]) | |
words.add(w) | |
return generate_graph(words) | |
G = words_graph() | |
print("Loaded words_dat.txt containing 5757 five-letter English words.") | |
print("Two words are connected if they differ in one letter.") | |
print(G) | |
print(f"{nx.number_connected_components(G)} connected components") | |
for source, target in [("chaos", "order"), ("nodes", "graph"), ("pound", "marks")]: | |
print(f"Shortest path between {source} and {target} is") | |
try: | |
shortest_path = nx.shortest_path(G, source, target) | |
for n in shortest_path: | |
print(n) | |
except nx.NetworkXNoPath: | |
print("None") | |
# draw a subset of the graph | |
boundary = list(nx.node_boundary(G, shortest_path)) | |
G.add_nodes_from(shortest_path, color="red") | |
G.add_nodes_from(boundary, color="blue") | |
H = G.subgraph(shortest_path + boundary) | |
colors = nx.get_node_attributes(H, "color") | |
options = {"node_size": 1500, "alpha": 0.3, "node_color": colors.values()} | |
pos = nx.kamada_kawai_layout(H) | |
nx.draw(H, pos, **options) | |
nx.draw_networkx_labels(H, pos, font_weight="bold") | |
plt.show() | |