File size: 5,369 Bytes
53c0cc8
 
 
 
 
 
 
 
 
6641fa8
53c0cc8
 
 
31b1b7e
53c0cc8
6d106b8
 
 
 
 
 
 
 
 
 
a12858e
53c0cc8
 
6641fa8
53c0cc8
6641fa8
 
 
 
 
31b1b7e
6641fa8
31b1b7e
 
 
 
 
 
 
 
 
 
6641fa8
 
31b1b7e
6641fa8
53c0cc8
c410e03
53c0cc8
c7100d5
 
 
 
 
 
 
 
 
 
 
49600c8
6641fa8
53c0cc8
 
49600c8
6641fa8
49600c8
 
 
c410e03
ceffe7d
 
49600c8
ceffe7d
 
 
53c0cc8
 
49600c8
 
 
 
 
 
ae4e744
49600c8
53c0cc8
49600c8
 
53c0cc8
ae4e744
c7100d5
 
 
 
 
 
53c0cc8
d77dd13
 
 
 
 
 
 
 
 
 
 
 
 
53c0cc8
3e9c92c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from __future__ import annotations

import json
import shutil
import subprocess
import tempfile
from datetime import datetime, timedelta
from functools import lru_cache
from pathlib import Path
from huggingface_hub import hf_hub_download

import gradio as gr

from modular_graph_and_candidates import build_graph_json, generate_html, build_timeline_json, generate_timeline_html, filter_graph_by_threshold

def _escape_srcdoc(text: str) -> str:
    """Escape for inclusion inside an <iframe srcdoc="…"> attribute."""
    return (
        text.replace("&", "&amp;")
            .replace("\"", "&quot;")
            .replace("'", "&#x27;")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
    )


HF_MAIN_REPO = "https://github.com/huggingface/transformers"

CACHE_REPO = "Molbap/hf_cached_embeds_log"

def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool):
    repo_id = CACHE_REPO
    latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
    info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
    sha = info.get("sha")
    key = f"{sha}/{sim_method}-m{int(multimodal)}"
    json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset")

    raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8"))
    filtered_data = filter_graph_by_threshold(raw_data, threshold)

    if kind == "timeline":
        from modular_graph_and_candidates import generate_timeline_html
        raw_html = generate_timeline_html(filtered_data)
    else:
        raw_html = generate_html(filtered_data)

    iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
    tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
    return iframe_html, str(tmp)



def run_loc(sim_method: str, multimodal: bool):
    latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
    info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
    sha = info["sha"]
    key = f"{sha}/{sim_method}-m{int(multimodal)}"
    html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
    raw_html = Path(html_fp).read_text(encoding="utf-8")
    iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
    return iframe_html


def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
    return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal)


def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
    return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal)



# ───────────────────────────── UI ────────────────────────────────────────────────

CUSTOM_CSS = """
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
"""

with gr.Blocks(css=CUSTOM_CSS) as demo:
    gr.Markdown("## πŸ” Modular‑candidate explorer for πŸ€— Transformers")

    with gr.Tabs():
        with gr.Tab("Chronological Timeline"):
            with gr.Row():
                timeline_repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
                timeline_thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β‰₯")
                timeline_multi_cb = gr.Checkbox(label="Only multimodal models")
                gr.Markdown("**Embedding method:** TBD")
                timeline_btn = gr.Button("Build timeline")

            timeline_html_out = gr.HTML(elem_id="timeline_html", show_label=False)
            timeline_json_out = gr.File(label="Download timeline.json")

            timeline_btn.click(lambda repo, thresh, multi: run_timeline(repo, thresh, multi, "jaccard"), [timeline_repo_in, timeline_thresh, timeline_multi_cb], [timeline_html_out, timeline_json_out])
        with gr.Tab("LOC Growth"):
            sim_radio2 = gr.Radio(["jaccard","embedding"], value="jaccard", label="Similarity metric")
            multi_cb2  = gr.Checkbox(label="Only multimodal models")
            go_loc     = gr.Button("Show LOC growth")
            loc_html   = gr.HTML(show_label=False)
            go_loc.click(run_loc, [sim_radio2, multi_cb2], loc_html)

        with gr.Tab("Dependency Graph"):
            with gr.Row():
                repo_in   = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL")
                thresh    = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β‰₯")
                multi_cb  = gr.Checkbox(label="Only multimodal models")
                gr.Markdown("**Embedding method:** TBD")
                go_btn    = gr.Button("Build graph")

            graph_html_out  = gr.HTML(elem_id="graph_html", show_label=False)
            graph_json_out  = gr.File(label="Download graph.json")

            go_btn.click(lambda repo, thresh, multi: run_graph(repo, thresh, multi, "jaccard"), [repo_in, thresh, multi_cb], [graph_html_out, graph_json_out])

if __name__ == "__main__":
    demo.launch(allowed_paths=["static"])