File size: 4,609 Bytes
35e5b82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
import json
import gzip
import xml.etree.ElementTree as ET
import sys
import os

def graphml_to_json(graphml_file, output_json, compressed_output=None):
    """
    Convert a GraphML file to SigmaJS-compatible JSON format
    """
    # Parse the GraphML file
    print(f"Parsing GraphML file: {graphml_file}")
    tree = ET.parse(graphml_file)
    root = tree.getroot()
    
    # Define the namespace
    ns = {'graphml': 'http://graphml.graphdrawing.org/xmlns'}
    
    # Extract the graph from the GraphML
    graph = root.find('graphml:graph', ns)
    
    if graph is None:
        # Try without namespace
        graph = root.find('graph')
        if graph is None:
            raise ValueError("Could not find graph element in GraphML file")
    
    # Prepare the JSON structure
    sigma_data = {
        'nodes': [],
        'edges': []
    }
    
    print("Processing nodes...")
    node_count = 0
    # Process nodes
    for node in graph.findall('graphml:node', ns) or graph.findall('node'):
        node_id = node.get('id')
        node_data = {'id': node_id, 'attr': {'colors': {}}}
        
        # Process node attributes
        for data in node.findall('graphml:data', ns) or node.findall('data'):
            key = data.get('key')
            if key == 'label':
                node_data['label'] = data.text
            elif key == 'x':
                node_data['x'] = float(data.text)
            elif key == 'y':
                node_data['y'] = float(data.text)
            elif key == 'size':
                node_data['size'] = float(data.text)
            elif key == 'r':
                # Find g and b values
                g_elem = node.find(f'graphml:data[@key="g"]', ns) or node.find(f'data[@key="g"]')
                b_elem = node.find(f'graphml:data[@key="b"]', ns) or node.find(f'data[@key="b"]')
                
                if g_elem is not None and b_elem is not None:
                    node_data['color'] = f"rgb({data.text},{g_elem.text},{b_elem.text})"
            elif key == 'type':
                node_data['attr']['colors']['type'] = data.text
                # Set a default color based on node type
                if data.text == 'author':
                    node_data['color'] = 'rgb(154,150,229)'
                elif data.text == 'paper':
                    node_data['color'] = 'rgb(229,150,154)'
                else:
                    node_data['color'] = 'rgb(150,229,154)'
        
        sigma_data['nodes'].append(node_data)
        node_count += 1
    
    print(f"Processed {node_count} nodes")
    
    print("Processing edges...")
    edge_count = 0
    # Process edges
    for edge in graph.findall('graphml:edge', ns) or graph.findall('edge'):
        source = edge.get('source')
        target = edge.get('target')
        
        edge_data = {
            'id': f"e{edge_count}",
            'source': source,
            'target': target
        }
        edge_count += 1
        
        # Process edge attributes
        for data in edge.findall('graphml:data', ns) or edge.findall('data'):
            key = data.get('key')
            if key == 'weight':
                edge_data['weight'] = float(data.text)
            elif key == 'edgelabel':
                edge_data['label'] = data.text
        
        sigma_data['edges'].append(edge_data)
    
    print(f"Processed {edge_count} edges")
    
    # Write the JSON file
    print(f"Writing JSON to {output_json}")
    with open(output_json, 'w') as f:
        json.dump(sigma_data, f)
    
    # If compressed output is requested, create a gzipped version
    if compressed_output:
        print(f"Creating compressed file: {compressed_output}")
        with open(output_json, 'rb') as f_in:
            data = f_in.read()
            # Write gzipped data with proper headers for web
            with gzip.open(compressed_output, 'wb', compresslevel=9) as f_out:
                f_out.write(data)

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("Usage: python graphml_to_json.py <input_graphml> <output_json> [compressed_output]")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    compressed_file = sys.argv[3] if len(sys.argv) > 3 else None
    
    try:
        graphml_to_json(input_file, output_file, compressed_file)
        print(f"Conversion completed. JSON saved to {output_file}")
        if compressed_file:
            print(f"Compressed version saved to {compressed_file}")
    except Exception as e:
        print(f"Error during conversion: {e}")
        sys.exit(1)