Spaces:
Running
Running
fix
Browse files
app.py
CHANGED
|
@@ -12,44 +12,44 @@ from pathlib import Path
|
|
| 12 |
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
-
# ββ
|
| 16 |
-
# β’ build_graph_json(transformers_dir: Path, threshold: float, multimodal: bool, sim_method: str) -> dict
|
| 17 |
-
# β’ generate_html(graph: dict) -> str (returns full <html>β¦ string)
|
| 18 |
from modular_graph_and_candidates import build_graph_json, generate_html
|
| 19 |
|
| 20 |
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
|
| 21 |
|
|
|
|
| 22 |
|
| 23 |
@lru_cache(maxsize=4)
|
| 24 |
def clone_or_cache(repo_url: str) -> Path:
|
| 25 |
-
"""
|
| 26 |
-
|
| 27 |
-
The repo is cached under /tmp/<hash>. A hidden ``.cloned_at`` file stores the
|
| 28 |
-
UTC ISO timestamp of the last clone; if that stamp is <β―24β―h old we reuse the
|
| 29 |
-
existing checkout, otherwise we wipe the directory and clone afresh. This
|
| 30 |
-
guarantees deterministic daily snapshots while avoiding repeated network
|
| 31 |
-
cost within the same day (even across independent Space sessions if the
|
| 32 |
-
container persists).
|
| 33 |
-
"""
|
| 34 |
tmp_root = Path(tempfile.gettempdir())
|
| 35 |
cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
|
| 36 |
stamp = cache_dir / ".cloned_at"
|
| 37 |
|
| 38 |
if cache_dir.exists() and stamp.exists():
|
| 39 |
try:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
return cache_dir # fresh enough
|
| 43 |
except Exception:
|
| 44 |
-
#
|
| 45 |
-
pass
|
| 46 |
-
# stale cache β remove dir completely
|
| 47 |
shutil.rmtree(cache_dir, ignore_errors=True)
|
| 48 |
|
| 49 |
subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
|
| 50 |
stamp.write_text(datetime.utcnow().isoformat())
|
| 51 |
return cache_dir
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
| 55 |
repo_path = clone_or_cache(repo_url)
|
|
@@ -61,15 +61,18 @@ def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
|
| 61 |
sim_method=sim_method,
|
| 62 |
)
|
| 63 |
|
| 64 |
-
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
return html, str(json_path)
|
| 71 |
|
|
|
|
|
|
|
|
|
|
| 72 |
|
|
|
|
| 73 |
|
| 74 |
CUSTOM_CSS = """
|
| 75 |
#graph_html iframe {height:85vh !important; width:100% !important; border:none;}
|
|
@@ -85,7 +88,7 @@ with gr.Blocks(css=CUSTOM_CSS) as demo:
|
|
| 85 |
sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
|
| 86 |
go_btn = gr.Button("Build graph")
|
| 87 |
|
| 88 |
-
html_out = gr.HTML(elem_id="graph_html", show_label=False)
|
| 89 |
json_out = gr.File(label="Download graph.json")
|
| 90 |
|
| 91 |
go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])
|
|
|
|
| 12 |
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
+
# ββ refactored helpers ββ
|
|
|
|
|
|
|
| 16 |
from modular_graph_and_candidates import build_graph_json, generate_html
|
| 17 |
|
| 18 |
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
|
| 19 |
|
| 20 |
+
# βββββββββββββββββββββββββββββ cache repo once per 24β―h βββββββββββββββββββββββββββ
|
| 21 |
|
| 22 |
@lru_cache(maxsize=4)
|
| 23 |
def clone_or_cache(repo_url: str) -> Path:
|
| 24 |
+
"""Shallowβclone *repo_url* and reuse it for 24β―h."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
tmp_root = Path(tempfile.gettempdir())
|
| 26 |
cache_dir = tmp_root / f"repo_{abs(hash(repo_url))}"
|
| 27 |
stamp = cache_dir / ".cloned_at"
|
| 28 |
|
| 29 |
if cache_dir.exists() and stamp.exists():
|
| 30 |
try:
|
| 31 |
+
if datetime.utcnow() - datetime.fromisoformat(stamp.read_text().strip()) < timedelta(days=1):
|
| 32 |
+
return cache_dir
|
|
|
|
| 33 |
except Exception:
|
| 34 |
+
pass # fall through β reclone
|
|
|
|
|
|
|
| 35 |
shutil.rmtree(cache_dir, ignore_errors=True)
|
| 36 |
|
| 37 |
subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(cache_dir)])
|
| 38 |
stamp.write_text(datetime.utcnow().isoformat())
|
| 39 |
return cache_dir
|
| 40 |
|
| 41 |
+
# βββββββββββββββββββββββββββββ main callback βββββββββββββββββββββββββββββββββββββ
|
| 42 |
+
|
| 43 |
+
def _escape_srcdoc(text: str) -> str:
|
| 44 |
+
"""Escape for inclusion inside an <iframe srcdoc="β¦"> attribute."""
|
| 45 |
+
return (
|
| 46 |
+
text.replace("&", "&")
|
| 47 |
+
.replace("\"", """)
|
| 48 |
+
.replace("'", "'")
|
| 49 |
+
.replace("<", "<")
|
| 50 |
+
.replace(">", ">")
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
|
| 54 |
def run(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
| 55 |
repo_path = clone_or_cache(repo_url)
|
|
|
|
| 61 |
sim_method=sim_method,
|
| 62 |
)
|
| 63 |
|
| 64 |
+
raw_html = generate_html(graph)
|
| 65 |
|
| 66 |
+
iframe_html = (
|
| 67 |
+
f'<iframe style="width:100%;height:85vh;border:none;" '
|
| 68 |
+
f'srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
|
| 69 |
+
)
|
|
|
|
| 70 |
|
| 71 |
+
tmp_json = Path(tempfile.mktemp(suffix=".json"))
|
| 72 |
+
tmp_json.write_text(json.dumps(graph), encoding="utf-8")
|
| 73 |
+
return iframe_html, str(tmp_json)
|
| 74 |
|
| 75 |
+
# βββββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
|
| 77 |
CUSTOM_CSS = """
|
| 78 |
#graph_html iframe {height:85vh !important; width:100% !important; border:none;}
|
|
|
|
| 88 |
sim_radio = gr.Radio(["jaccard", "embedding"], value="jaccard", label="Similarity metric")
|
| 89 |
go_btn = gr.Button("Build graph")
|
| 90 |
|
| 91 |
+
html_out = gr.HTML(elem_id="graph_html", sanitize=False, show_label=False)
|
| 92 |
json_out = gr.File(label="Download graph.json")
|
| 93 |
|
| 94 |
go_btn.click(run, [repo_in, thresh, multi_cb, sim_radio], [html_out, json_out])
|