Spaces:

transformers-community
/

Transformers-tenets

Running

App Files Files Community

Molbap HF Staff commited on Aug 20

Commit

09407c4

1 Parent(s): c073d08

refacto

Browse files

Files changed (1) hide show

app.py +230 -121

app.py CHANGED Viewed

@@ -1,48 +1,76 @@
-import os
-import sys
 import re
-import json
-import time
-import threading
 import subprocess
 from pathlib import Path
 import gradio as gr
 import pandas as pd
 import torch
 import spaces
-# ---------------------------
-# Markdown rendering (Option A)
-# ---------------------------
-def _make_md_markdownit():
-    # Prefer markdown-it-py + mdit-py-plugins if available
     from importlib import import_module
     from markdown_it import MarkdownIt
-    md = MarkdownIt("gfm-like")
-    # Version-agnostic plugin shims
-    foot_mod = import_module("mdit_py_plugins.footnote")
-    foot = getattr(foot_mod, "footnote", None) or getattr(foot_mod, "footnote_plugin")
-    md.use(foot)
-    tl_mod = import_module("mdit_py_plugins.tasklists")
-    tasklists = getattr(tl_mod, "tasklists", None) or getattr(tl_mod, "tasklists_plugin")
-    md.use(tasklists)
-    cont_mod = import_module("mdit_py_plugins.container")
-    container = getattr(cont_mod, "container", None) or getattr(cont_mod, "container_plugin")
     try:
-        md.use(container, "details")
-    except TypeError:
-        md.use(lambda m: container(m, name="details"))
-    return md
-def _make_md_pythonmarkdown():
-    # Fallback: Python-Markdown + PyMdown
-    import markdown as md
-    exts = [
         "extra",            # tables + fenced code
         "footnotes",
         "admonition",
@@ -51,13 +79,20 @@ def _make_md_pythonmarkdown():
         "pymdownx.superfences",
         "pymdownx.tasklist",
     ]
-    ext_cfg = {"pymdownx.tasklist": {"custom_checkbox": True}, "toc": {"permalink": True}}
-    return ("python-markdown", exts, ext_cfg, md)
-try:
-    _md_engine = ("markdown-it", _make_md_markdownit())
-except Exception:
-    _md_engine = _make_md_pythonmarkdown()
 def _obsidian_rewrites(text: str) -> str:
     # 1) Obsidian image embeds: ![[img.png]]  ->  ![](file=content/img.png)
@@ -83,47 +118,45 @@ def _obsidian_rewrites(text: str) -> str:
     return text
-def md_to_html(text: str) -> str:
     text = _obsidian_rewrites(text)
-    if _md_engine[0] == "markdown-it":
-        md = _md_engine[1]
-        return md.render(text)
     else:
-        tag, exts, cfg, md = _md_engine
-        return md.markdown(text, extensions=exts, extension_configs=cfg, output_format="html5")
-def render_article(md_path: str, inserts: dict[str, callable]):
-    raw = Path(md_path).read_text(encoding="utf-8") if Path(md_path).exists() else f"**Missing article**: `{md_path}`."
-    parts = re.split(r"\{\{([A-Z_]+)\}\}", raw)
-    with gr.Column():
-        for i, part in enumerate(parts):
-            if i % 2 == 0:
-                # Wrap prose in an article container for scoped CSS
-                gr.HTML(f'<div class="article">{md_to_html(part)}</div>')
-            else:
-                (inserts.get(part) or (lambda: gr.HTML(f"<p><em>Unknown insert: {part}</em></p>")))()
-def old_render_article(md_path: str, inserts: dict[str, callable]):
-    raw = ""
-    path = Path(md_path)
-    if path.exists():
-        raw = path.read_text(encoding="utf-8")
     else:
-        raw = f"**Missing article**: `{md_path}` not found.\n\nCreate it in your Space repo."
     # Split on {{TOKEN}} markers (e.g., {{ALLOC_PLOT}})
-    parts = re.split(r"\{\{([A-Z_]+)\}\}", raw)
     with gr.Column():
-        for i, part in enumerate(parts):
-            if i % 2 == 0:
-                gr.HTML(md_to_html(part))
             else:
-                build = inserts.get(part)
-                if build is None:
-                    gr.HTML(f"<p><em>Unknown insert: {part}</em></p>")
                 else:
-                    build()
 # ---------------------------
 # Terminal (safe, simplified)
@@ -187,60 +220,65 @@ def build_attn_vis():
 # Transformers caching allocator warmup (time vs MiB plot)
 # -------------------------------------------------------
-from transformers import AutoModelForCausalLM, modeling_utils as MU  # noqa: E402
 def _measure_load_timeline(model_id: str, disable_warmup: bool):
     """Measure memory usage during model loading with/without cache warmup."""
-    orig = getattr(MU, "caching_allocator_warmup", None)
-    if disable_warmup and orig is not None:
-        MU.caching_allocator_warmup = lambda *a, **k: None  # type: ignore[attr-defined]
     try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        tl = []
-        def sample(start_t, stop_evt):
-            while not stop_evt.is_set():
                 if device == "cuda":
                     torch.cuda.synchronize()
                     # Use max memory to capture peaks better
-                    alloc = torch.cuda.max_memory_allocated()
                     torch.cuda.reset_peak_memory_stats()
                 else:
-                    alloc = 0
-                tl.append({"t": time.perf_counter() - start_t, "MiB": alloc / (1024**2)})
                 time.sleep(0.02)  # Sample more frequently
         if device == "cuda":
             torch.cuda.empty_cache()
             torch.cuda.reset_peak_memory_stats()
-            initial_mem = torch.cuda.memory_allocated()
         else:
-            initial_mem = 0
-        start = time.perf_counter()
-        stop_evt = threading.Event()
-        th = threading.Thread(target=sample, args=(start, stop_evt), daemon=True)
-        th.start()
         # Load model with appropriate settings
-        kwargs = {"low_cpu_mem_usage": True}
         if device == "cuda":
-            kwargs.update({
                 "torch_dtype": torch.float16,
                 "device_map": "cuda:0"
             })
-        model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
-        stop_evt.set()
-        th.join()
         # Final memory measurement
         if device == "cuda":
             torch.cuda.synchronize()
-            final_mem = torch.cuda.memory_allocated()
-            tl.append({"t": time.perf_counter() - start, "MiB": final_mem / (1024**2)})
         # Clean up
         del model
@@ -248,43 +286,56 @@ def _measure_load_timeline(model_id: str, disable_warmup: bool):
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()
-        return tl
     finally:
-        if orig is not None:
-            MU.caching_allocator_warmup = orig  # restore
 @spaces.GPU(duration=240)
-def profile_warmup(model_id: str):
     if not torch.cuda.is_available():
         # Create dummy data for CPU demo
-        import numpy as np
-        t_points = np.linspace(0, 5, 50)
-        base_mem = np.cumsum(np.random.exponential(50, 50))
-        warmup_on = [{"t": t, "MiB": mem, "mode": "warmup ON"} for t, mem in zip(t_points, base_mem * 0.8)]
-        warmup_off = [{"t": t, "MiB": mem, "mode": "warmup OFF"} for t, mem in zip(t_points, base_mem)]
-        return pd.DataFrame(warmup_on + warmup_off)
     try:
-        on_data = _measure_load_timeline(model_id, disable_warmup=False)
-        off_data = _measure_load_timeline(model_id, disable_warmup=True)
         # Create DataFrame with better labeling
-        rows = [{"t": r["t"], "MiB": r["MiB"], "mode": "🚀 Warmup ON (Optimized)"} for r in on_data] + \
-               [{"t": r["t"], "MiB": r["MiB"], "mode": "📈 Warmup OFF (Standard)"} for r in off_data]
-        df = pd.DataFrame(rows)
-        # Add summary stats if we have data
-        if len(on_data) > 0 and len(off_data) > 0:
-            on_peak = max(r["MiB"] for r in on_data)
-            off_peak = max(r["MiB"] for r in off_data)
-            savings = ((off_peak - on_peak) / off_peak * 100) if off_peak > 0 else 0
-            print(f"Memory savings: {savings:.1f}% (Peak: {on_peak:.0f} MiB vs {off_peak:.0f} MiB)")
-        return df
-    except Exception as e:
-        print(f"Error profiling {model_id}: {e}")
-        # Return empty DataFrame on error
         return pd.DataFrame(columns=["t", "MiB", "mode"])
 def build_alloc_plot():
@@ -317,7 +368,7 @@ def build_alloc_plot():
         )
         gr.Markdown("**Note**: This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.")
-        go.click(profile_warmup, inputs=[model], outputs=plot)
 # ---------------------------
 # Optional FastRTC preview
@@ -335,10 +386,14 @@ def build_fastrtc():
     if not HAS_FASTRTC:
         gr.Markdown("Install `fastrtc` to enable this section.")
         return
     with gr.Group():
         gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.")
-        rtc = WebRTC(mode="send-receive", modality="video")
-        rtc.stream(ReplyOnPause(_echo_video), inputs=[rtc], outputs=[rtc], time_limit=60)
 # ---------------------------
 # Image display functions
@@ -547,6 +602,60 @@ hr { border: 0; border-top: 1px solid var(--border-color); margin: 2rem 0; }
   margin-bottom: 0.5rem !important;
 }
 """
 with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog — Transformers Feature Showcase") as demo:

+# Standard library imports
 import re
 import subprocess
+import threading
+import time
 from pathlib import Path
+# Third-party imports
 import gradio as gr
+import numpy as np
 import pandas as pd
 import torch
 import spaces
+from transformers import AutoModelForCausalLM
+from transformers import modeling_utils as transformers_modeling
+# Optional imports for markdown processing
+try:
     from importlib import import_module
     from markdown_it import MarkdownIt
+    HAS_MARKDOWN_IT = True
+except ImportError:
+    HAS_MARKDOWN_IT = False
+try:
+    import markdown
+    HAS_PYTHON_MARKDOWN = True
+except ImportError:
+    HAS_PYTHON_MARKDOWN = False
+try:
+    from fastrtc import WebRTC, ReplyOnPause
+    HAS_FASTRTC = True
+except ImportError:
+    HAS_FASTRTC = False
+# ---------------------------
+# Markdown rendering (Option A)
+# ---------------------------
+def _create_markdownit_renderer():
+    """Create markdown-it renderer with plugins if available."""
+    if not HAS_MARKDOWN_IT:
+        return None
     try:
+        markdown_parser = MarkdownIt("gfm-like")
+        # Version-agnostic plugin loading
+        footnote_module = import_module("mdit_py_plugins.footnote")
+        footnote_plugin = getattr(footnote_module, "footnote", None) or getattr(footnote_module, "footnote_plugin")
+        markdown_parser.use(footnote_plugin)
+        tasklist_module = import_module("mdit_py_plugins.tasklists")
+        tasklist_plugin = getattr(tasklist_module, "tasklists", None) or getattr(tasklist_module, "tasklists_plugin")
+        markdown_parser.use(tasklist_plugin)
+        container_module = import_module("mdit_py_plugins.container")
+        container_plugin = getattr(container_module, "container", None) or getattr(container_module, "container_plugin")
+        try:
+            markdown_parser.use(container_plugin, "details")
+        except TypeError:
+            markdown_parser.use(lambda m: container_plugin(m, name="details"))
+        return markdown_parser
+    except Exception:
+        return None
+def _create_python_markdown_config():
+    """Create Python-Markdown configuration as fallback."""
+    if not HAS_PYTHON_MARKDOWN:
+        return None
+    extensions = [
         "extra",            # tables + fenced code
         "footnotes",
         "admonition",
         "pymdownx.superfences",
         "pymdownx.tasklist",
     ]
+    extension_config = {
+        "pymdownx.tasklist": {"custom_checkbox": True},
+        "toc": {"permalink": True}
+    }
+    return ("python-markdown", extensions, extension_config, markdown)
+# Initialize markdown engine
+markdown_renderer = _create_markdownit_renderer()
+if markdown_renderer:
+    markdown_engine = ("markdown-it", markdown_renderer)
+else:
+    markdown_engine = _create_python_markdown_config()
+    if not markdown_engine:
+        raise ImportError("No markdown processor available")
 def _obsidian_rewrites(text: str) -> str:
     # 1) Obsidian image embeds: ![[img.png]]  ->  ![](file=content/img.png)
     return text
+def markdown_to_html(text: str) -> str:
+    """Convert markdown text to HTML using the configured renderer."""
     text = _obsidian_rewrites(text)
+    if markdown_engine[0] == "markdown-it":
+        renderer = markdown_engine[1]
+        return renderer.render(text)
     else:
+        engine_type, extensions, extension_config, markdown_module = markdown_engine
+        return markdown_module.markdown(
+            text,
+            extensions=extensions,
+            extension_configs=extension_config,
+            output_format="html5"
+        )
+def render_article(article_path: str, component_inserts: dict[str, callable]):
+    """Render article from markdown with embedded interactive components."""
+    if Path(article_path).exists():
+        raw_content = Path(article_path).read_text(encoding="utf-8")
     else:
+        raw_content = f"**Missing article**: `{article_path}` not found."
     # Split on {{TOKEN}} markers (e.g., {{ALLOC_PLOT}})
+    content_parts = re.split(r"\{\{([A-Z_]+)\}\}", raw_content)
     with gr.Column():
+        for index, part in enumerate(content_parts):
+            if index % 2 == 0:
+                # Render markdown content wrapped in article container
+                html_content = markdown_to_html(part)
+                gr.HTML(f'<div class="article">{html_content}</div>')
             else:
+                # Render interactive component or show error
+                component_builder = component_inserts.get(part)
+                if component_builder is None:
+                    gr.HTML(f"<p><em>Unknown component: {part}</em></p>")
                 else:
+                    component_builder()
 # ---------------------------
 # Terminal (safe, simplified)
 # Transformers caching allocator warmup (time vs MiB plot)
 # -------------------------------------------------------
 def _measure_load_timeline(model_id: str, disable_warmup: bool):
     """Measure memory usage during model loading with/without cache warmup."""
+    original_warmup_func = getattr(transformers_modeling, "caching_allocator_warmup", None)
+    if disable_warmup and original_warmup_func is not None:
+        transformers_modeling.caching_allocator_warmup = lambda *args, **kwargs: None
     try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        timeline_data = []
+        def sample_memory(start_time, stop_event):
+            while not stop_event.is_set():
                 if device == "cuda":
                     torch.cuda.synchronize()
                     # Use max memory to capture peaks better
+                    allocated_memory = torch.cuda.max_memory_allocated()
                     torch.cuda.reset_peak_memory_stats()
                 else:
+                    allocated_memory = 0
+                timeline_data.append({
+                    "t": time.perf_counter() - start_time,
+                    "MiB": allocated_memory / (1024**2)
+                })
                 time.sleep(0.02)  # Sample more frequently
         if device == "cuda":
             torch.cuda.empty_cache()
             torch.cuda.reset_peak_memory_stats()
+            initial_memory = torch.cuda.memory_allocated()
         else:
+            initial_memory = 0
+        start_time = time.perf_counter()
+        stop_event = threading.Event()
+        memory_thread = threading.Thread(target=sample_memory, args=(start_time, stop_event), daemon=True)
+        memory_thread.start()
         # Load model with appropriate settings
+        model_kwargs = {"low_cpu_mem_usage": True}
         if device == "cuda":
+            model_kwargs.update({
                 "torch_dtype": torch.float16,
                 "device_map": "cuda:0"
             })
+        model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
+        stop_event.set()
+        memory_thread.join()
         # Final memory measurement
         if device == "cuda":
             torch.cuda.synchronize()
+            final_memory = torch.cuda.memory_allocated()
+            timeline_data.append({
+                "t": time.perf_counter() - start_time,
+                "MiB": final_memory / (1024**2)
+            })
         # Clean up
         del model
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()
+        return timeline_data
     finally:
+        if original_warmup_func is not None:
+            transformers_modeling.caching_allocator_warmup = original_warmup_func
 @spaces.GPU(duration=240)
+def profile_warmup_comparison(model_id: str):
+    """Profile memory usage with and without cache warmup."""
     if not torch.cuda.is_available():
         # Create dummy data for CPU demo
+        time_points = np.linspace(0, 5, 50)
+        base_memory = np.cumsum(np.random.exponential(50, 50))
+        warmup_enabled_data = [
+            {"t": t, "MiB": mem, "mode": "🚀 Warmup ON (Optimized)"}
+            for t, mem in zip(time_points, base_memory * 0.8)
+        ]
+        warmup_disabled_data = [
+            {"t": t, "MiB": mem, "mode": "📈 Warmup OFF (Standard)"}
+            for t, mem in zip(time_points, base_memory)
+        ]
+        return pd.DataFrame(warmup_enabled_data + warmup_disabled_data)
     try:
+        warmup_enabled_timeline = _measure_load_timeline(model_id, disable_warmup=False)
+        warmup_disabled_timeline = _measure_load_timeline(model_id, disable_warmup=True)
         # Create DataFrame with better labeling
+        all_data = []
+        all_data.extend([
+            {"t": entry["t"], "MiB": entry["MiB"], "mode": "🚀 Warmup ON (Optimized)"}
+            for entry in warmup_enabled_timeline
+        ])
+        all_data.extend([
+            {"t": entry["t"], "MiB": entry["MiB"], "mode": "📈 Warmup OFF (Standard)"}
+            for entry in warmup_disabled_timeline
+        ])
+        result_dataframe = pd.DataFrame(all_data)
+        # Calculate and log memory savings
+        if warmup_enabled_timeline and warmup_disabled_timeline:
+            peak_with_warmup = max(entry["MiB"] for entry in warmup_enabled_timeline)
+            peak_without_warmup = max(entry["MiB"] for entry in warmup_disabled_timeline)
+            if peak_without_warmup > 0:
+                savings_percent = ((peak_without_warmup - peak_with_warmup) / peak_without_warmup * 100)
+                print(f"Memory savings: {savings_percent:.1f}% (Peak: {peak_with_warmup:.0f} MiB vs {peak_without_warmup:.0f} MiB)")
+        return result_dataframe
+    except Exception as error:
+        print(f"Error profiling {model_id}: {error}")
         return pd.DataFrame(columns=["t", "MiB", "mode"])
 def build_alloc_plot():
         )
         gr.Markdown("**Note**: This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.")
+        go.click(profile_warmup_comparison, inputs=[model], outputs=plot)
 # ---------------------------
 # Optional FastRTC preview
     if not HAS_FASTRTC:
         gr.Markdown("Install `fastrtc` to enable this section.")
         return
+    def echo_video_frame(frame):
+        yield frame
     with gr.Group():
         gr.Markdown("Camera loopback using FastRTC WebRTC. Extend with streaming handlers later.")
+        webrtc_component = WebRTC(mode="send-receive", modality="video")
+        webrtc_component.stream(ReplyOnPause(echo_video_frame), inputs=[webrtc_component], outputs=[webrtc_component], time_limit=60)
 # ---------------------------
 # Image display functions
   margin-bottom: 0.5rem !important;
 }
+/* Fix contrast for all interactive components */
+.gr-form, .gr-panel, .gr-block {
+  background: #ffffff !important;
+  border: 1px solid var(--border-color) !important;
+  border-radius: 8px !important;
+}
+/* Fix text inputs */
+.gr-textbox input {
+  background: #ffffff !important;
+  color: #1f2937 !important;
+  border: 1px solid var(--border-color) !important;
+  font-weight: 500 !important;
+}
+/* Fix all labels */
+.gr-form label, .gr-panel label, .gr-block label {
+  color: #374151 !important;
+  font-weight: 600 !important;
+}
+/* Fix info text */
+.gr-form .gr-info, .gr-panel .gr-info {
+  color: #6b7280 !important;
+  font-weight: 500 !important;
+}
+/* Fix plot styling */
+.gr-plot {
+  border: 1px solid var(--border-color) !important;
+  border-radius: 8px !important;
+  background: #ffffff !important;
+}
+/* Fix any remaining low contrast text */
+.gradio-container * {
+  color: inherit !important;
+}
+/* Ensure all text in components has good contrast */
+.gr-form *, .gr-panel *, .gr-block * {
+  color: #1f2937 !important;
+}
+/* Fix markdown in components */
+.gr-markdown {
+  color: #1f2937 !important;
+}
+.gr-markdown h1, .gr-markdown h2, .gr-markdown h3, .gr-markdown h4 {
+  color: #111827 !important;
+  font-weight: 600 !important;
+}
 """
 with gr.Blocks(css=CSS, fill_height=True, title="Interactive Blog — Transformers Feature Showcase") as demo: