Spaces:
Running
Running
import os | |
import ast | |
import json | |
import argparse | |
from collections import defaultdict, Counter | |
import re | |
def find_modular_files(transformers_path): | |
""" | |
Scans for files containing 'modular' in their name within the specified | |
Hugging Face Transformers repository path. This includes modeling, configuration, | |
and processing files. | |
""" | |
modular_files = [] | |
models_path = os.path.join(transformers_path, 'src', 'transformers', 'models') | |
for root, _, files in os.walk(models_path): | |
for file in files: | |
if 'modular' in file and file.endswith('.py'): | |
modular_files.append(os.path.join(root, file)) | |
return modular_files | |
def build_dependency_graph(modular_files): | |
""" | |
Builds a dependency graph by parsing the abstract syntax tree (AST) of each | |
modular file. It identifies imports from other models, configurations, and | |
processing files within the Transformers library. | |
""" | |
dependencies = defaultdict(list) | |
for file_path in modular_files: | |
derived_model_name = os.path.basename(os.path.dirname(file_path)) | |
with open(file_path, 'r', encoding='utf-8') as f: | |
try: | |
tree = ast.parse(f.read(), filename=file_path) | |
for node in ast.walk(tree): | |
if not isinstance(node, ast.ImportFrom) or not node.module: | |
continue | |
is_relevant_import = (( | |
node.module.startswith('transformers.models.') or | |
'modeling_' in node.module or | |
'configuration_' in node.module or | |
'processing_' in node.module or | |
node.module.startswith('..')) | |
and (all([x not in node.module for x in ['modeling_attn_mask_utils']])) | |
) | |
if is_relevant_import: | |
path_parts = re.split(r'\.|\.', node.module) | |
if len(path_parts) > 1: | |
# Heuristic to find the source model name | |
source_model_name = "" | |
for part in path_parts: | |
if part not in ("", "models", "transformers"): | |
source_model_name = part | |
break | |
if source_model_name and source_model_name != derived_model_name: | |
for alias in node.names: | |
dependencies[derived_model_name].append({ | |
'source': source_model_name, | |
'imported_class': alias.name | |
}) | |
except Exception as e: | |
print(f"Could not parse {file_path}: {e}") | |
return dict(dependencies) | |
def print_debug_info(dependencies): | |
"""Prints a human-readable summary of the model dependencies.""" | |
print("--- Model Dependency Debug ---") | |
if not dependencies: | |
print("No modular dependencies found.") | |
return | |
for derived_model, deps in sorted(dependencies.items()): | |
print(f"\n🎨 Derived Model: {derived_model}") | |
source_groups = defaultdict(list) | |
for dep in deps: | |
source_groups[dep['source']].append(dep['imported_class']) | |
for source, imports in sorted(source_groups.items()): | |
print(f" └── inherits from '{source}' (imports: {', '.join(sorted(imports))})") | |
print("\n--------------------------") | |
def generate_d3_visualization(dependencies, output_filename='d3_dependency_graph.html', hf_logo_path='hf-logo.svg'): | |
""" | |
Generates a self‑contained, interactive D3.js HTML file for visualizing | |
the dependency graph. The visualization is zoomable and uses a custom | |
SVG path for source nodes to resemble the Hugging Face logo. | |
Minor finetuning over the original version: | |
– Larger base‐model icons & labels | |
– Cleaner sans‑serif font (Inter/Arial fallback) | |
– Transparent page background | |
– Tighter layout (reduced repulsion & link distance) | |
– Fixed legend in top‑left corner | |
""" | |
# 1️⃣ Assemble graph‑data ------------------------------------------------------------------ | |
nodes = set() | |
links = [] | |
source_models = set() | |
derived_models = set(dependencies.keys()) | |
for derived_model, deps in dependencies.items(): | |
nodes.add(derived_model) | |
for dep in deps: | |
nodes.add(dep['source']) | |
source_models.add(dep['source']) | |
links.append({ | |
"source": dep['source'], | |
"target": derived_model, | |
"label": dep['imported_class'] | |
}) | |
base_models = source_models - derived_models | |
consolidated_links = defaultdict(list) | |
for link in links: | |
key = (link['source'], link['target']) | |
consolidated_links[key].append(link['label']) | |
final_links = [ | |
{"source": k[0], "target": k[1], "label": f"{len(v)} classes"} | |
for k, v in consolidated_links.items() | |
] | |
degree = Counter() | |
for link in final_links: | |
degree[link["source"]] += 1 | |
degree[link["target"]] += 1 | |
max_deg = max(degree.values() or [1]) # prevent div by 0 | |
node_list = [] | |
for name in sorted(nodes): | |
node_list.append({ | |
"id": name, | |
"is_base": name in base_models, | |
"size": 1 + 2 * (degree[name] / max_deg) | |
}) | |
graph_data = { | |
"nodes": node_list, | |
"links": final_links | |
} | |
# 2️⃣ Static path for the HF logo outline (unused but kept for reference) ------------------ | |
hf_svg_path = ( | |
"M21.2,6.7c-0.2-0.2-0.5-0.3-0.8-0.3H3.6C3.3,6.4,3,6.5,2.8,6.7s-0.3,0.5-0.3,0.8v10.8c0,0.3,0.1,0.5,0.3,0.8 " | |
"c0.2,0.2,0.5,0.3,0.8,0.3h16.8c0.3,0,0.5-0.1,0.8-0.3c0.2-0.2,0.3-0.5,0.3-0.8V7.5C21.5,7.2,21.4,6.9,21.2,6.7z " | |
"M12,17.8L5.9,9.4h3.1 V8.3h6v1.1h3.1L12,17.8z" | |
) | |
# 3️⃣ HTML / CSS / JS --------------------------------------------------------------------- | |
html_template = f""" | |
<!DOCTYPE html> | |
<html lang=\"en\"> | |
<head> | |
<meta charset=\"UTF-8\"> | |
<title>Transformers Modular Model Dependencies</title> | |
<style> | |
/* Google‑font – small fallback cost & optional */ | |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap'); | |
:root {{ | |
--base‑size: 60px; /* icon radius helper */ | |
}} | |
body {{ | |
font-family: 'Inter', Arial, sans-serif; | |
margin: 0; | |
overflow: hidden; | |
background-color: transparent; /* requested transparency */ | |
}} | |
svg {{ | |
width: 100vw; | |
height: 100vh; | |
}} | |
.link {{ | |
stroke: #999; | |
stroke-opacity: 0.6; | |
}} | |
.node-label {{ | |
fill: #333; | |
pointer-events: none; | |
text-anchor: middle; | |
font-weight: 600; | |
}} | |
.link-label {{ | |
fill: #555; | |
font-size: 10px; | |
pointer-events: none; | |
text-anchor: middle; | |
}} | |
.node.base path {{ fill: #ffbe0b; }} | |
.node.derived circle {{ fill: #1f77b4; }} | |
/* Legend styling */ | |
#legend {{ | |
position: fixed; | |
top: 18px; | |
left: 18px; | |
font-size: 20px; | |
background: rgba(255,255,255,0.92); | |
padding: 18px 28px; | |
border-radius: 10px; | |
border: 1.5px solid #bbb; | |
font-family: 'Inter', Arial, sans-serif; | |
box-shadow: 0 2px 8px rgba(0,0,0,0.08); | |
z-index: 1000; | |
}} | |
</style> | |
</head> | |
<body> | |
<div id=\"legend\">🟡 base model (HF icon)<br>🔵 derived modular model<br>Edge label: #classes imported</div> | |
<svg id=\"dependency-graph\"></svg> | |
<script src=\"https://d3js.org/d3.v7.min.js\"></script> | |
<script> | |
const graphData = {json.dumps(graph_data, indent=4)}; | |
const hfLogoPath = "{hf_svg_path}"; // kept for potential future use | |
const width = window.innerWidth; | |
const height = window.innerHeight; | |
const svg = d3.select('#dependency-graph') | |
.call( | |
d3.zoom().on('zoom', (event) => {{ | |
g.attr('transform', event.transform); | |
}}) | |
); | |
const g = svg.append('g'); | |
// Forces – tweaked for tighter graph | |
const simulation = d3.forceSimulation(graphData.nodes) | |
.force('link', d3.forceLink(graphData.links).id(d => d.id).distance(500)) | |
.force('charge', d3.forceManyBody().strength(-500)) | |
.force('center', d3.forceCenter(width / 2, height / 2)) | |
.force('collide', d3.forceCollide(0.01 * parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')))); | |
// Links | |
const link = g.append('g') | |
.selectAll('line') | |
.data(graphData.links) | |
.join('line') | |
.attr('class', 'link') | |
.attr('stroke-width', 1.5); | |
// Link‑labels (#classes) | |
const linkLabel = g.append('g') | |
.selectAll('text') | |
.data(graphData.links) | |
.join('text') | |
.attr('class', 'link-label') | |
.text(d => d.label); | |
// Nodes (base vs derived) | |
const node = g.append('g') | |
.selectAll('g') | |
.data(graphData.nodes) | |
.join('g') | |
.attr('class', d => d.is_base ? 'node base' : 'node derived') | |
.call(d3.drag() | |
.on('start', dragstarted) | |
.on('drag', dragged) | |
.on('end', dragended) | |
); | |
// Base‑model icon (HF logo) | |
node.filter(d => d.is_base) | |
.append('image') | |
.attr('xlink:href', '{hf_logo_path}') | |
.attr('x', -parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')) / 2) | |
.attr('y', -parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')) / 2) | |
.attr('width', parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size'))) | |
.attr('height', parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size'))); | |
// Base‑model label (below icon) | |
node.filter(d => d.is_base) | |
.append('text') | |
.attr('class', 'node-label') | |
.attr('y', d => 30 * d.size + 8) // keep under the icon | |
.style('font-size', d => `${{26 * d.size}}px`) // scale 26–78 px for size 1-3 | |
.text(d => d.id); | |
// Derived‑model circle + label w/ background rect | |
const derived = node.filter(d => !d.is_base); | |
derived.append('circle') | |
.attr('r', d => 20 * d.size); // scaled | |
const labelGroup = derived.append('g').attr('class', 'label-group'); | |
labelGroup.append('rect') | |
.attr('x', -45) | |
.attr('y', -18) | |
.attr('width', 90) | |
.attr('height', 36) | |
.attr('rx', 8) | |
.attr('fill', '#fffbe6') | |
.attr('stroke', '#ccc'); | |
labelGroup.append('text') | |
.attr('class', 'node-label') | |
.attr('dy', '0.35em') | |
.style('font-size', '18px') | |
.text(d => d.id); | |
// Tick | |
simulation.on('tick', () => {{ | |
link.attr('x1', d => d.source.x) | |
.attr('y1', d => d.source.y) | |
.attr('x2', d => d.target.x) | |
.attr('y2', d => d.target.y); | |
linkLabel.attr('x', d => (d.source.x + d.target.x) / 2) | |
.attr('y', d => (d.source.y + d.target.y) / 2); | |
node.attr('transform', d => `translate(${{d.x}}, ${{d.y}})`); | |
}}); | |
// Drag helpers | |
function dragstarted(event, d) {{ | |
if (!event.active) simulation.alphaTarget(0.3).restart(); | |
d.fx = d.x; d.fy = d.y; | |
}} | |
function dragged(event, d) {{ | |
d.fx = event.x; d.fy = event.y; | |
}} | |
function dragended(event, d) {{ | |
if (!event.active) simulation.alphaTarget(0); | |
d.fx = null; d.fy = null; | |
}} | |
</script> | |
</body> | |
</html> | |
""" | |
with open(output_filename, 'w', encoding='utf-8') as f: | |
f.write(html_template) | |
print(f"✅ D3.js visualization saved to '{output_filename}'. Open this file in your browser.") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description="Visualize modular model dependencies in Transformers using D3.js.") | |
parser.add_argument("transformers_path", type=str, | |
help="The local path to the Hugging Face transformers repository.") | |
args = parser.parse_args() | |
modular_files = find_modular_files(args.transformers_path) | |
if not modular_files: | |
print("No modular files found. Make sure the path to the transformers repository is correct.") | |
else: | |
dependencies = build_dependency_graph(modular_files) | |
print_debug_info(dependencies) | |
generate_d3_visualization(dependencies) | |