| """ModelBrew AI β Zero Forgetting Benchmark Results Dashboard""" |
| import gradio as gr |
| import plotly.graph_objects as go |
|
|
| |
| BLUE = "#1F4E79" |
| GREEN = "#4CAF50" |
| RED = "#E53935" |
| GOLD = "#F9A825" |
| GRAY = "#757575" |
|
|
| |
| SEEDS = ["Seed 0", "Seed 42", "Seed 1234"] |
| NAIVE_FORGET = [38.1, 41.7, 49.0] |
| MODELBREW_DRIFT = [-0.03, -0.10, -0.37] |
|
|
| WALMART_DOMAINS = ["Customer Service", "Product Knowledge", "HR Policy", "Financial Analytics"] |
| WALMART_GEN = [0.92, 0.94, 0.88, 0.83] |
| WALMART_RET = [None, 0.83, 0.86, 0.82] |
|
|
| SF_DOMAINS = ["CRM Ops", "Sales Ops", "Reporting", "Support", "Admin & Dev"] |
| SF_GEN = [0.882, 0.897, 0.890, 0.885, 0.897] |
| SF_RET = [None, 0.889, 0.891, 0.897, 0.907] |
| SF_GN = [3.68, 2.15, 3.16, 2.53, 2.11] |
| SF_LOSS = [1.33, 1.05, 1.24, 0.96, 0.66] |
|
|
| DENTAL_DOMAINS = [f"Domain {i+1}" for i in range(8)] |
| DENTAL_MB_GN = [3.8, 4.2, 5.1, 4.5, 5.5, 4.8, 6.1, 5.2] |
| DENTAL_NAIVE_GN = [4.8, 5.6, 6.3, 6.9, 7.2, 8.1, 8.8, 9.4] |
|
|
|
|
| def make_seed_chart(): |
| fig = go.Figure() |
| fig.add_trace(go.Bar( |
| name="Naive LoRA (forgetting)", |
| x=SEEDS, y=NAIVE_FORGET, |
| marker_color=RED, |
| text=[f"+{v}%" for v in NAIVE_FORGET], |
| textposition="outside", |
| )) |
| fig.add_trace(go.Bar( |
| name="ModelBrew (drift)", |
| x=SEEDS, y=[abs(v) for v in MODELBREW_DRIFT], |
| marker_color=GREEN, |
| text=[f"{v}%" for v in MODELBREW_DRIFT], |
| textposition="outside", |
| )) |
| fig.update_layout( |
| title="Benchmark 1: Multi-Seed Research β 5 Domains on Mistral-7B", |
| yaxis_title="Knowledge Lost (%)", |
| barmode="group", |
| template="plotly_white", |
| height=450, |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), |
| ) |
| return fig |
|
|
|
|
| def make_walmart_chart(): |
| fig = go.Figure() |
| fig.add_trace(go.Bar( |
| name="Gen BERTScore", |
| x=WALMART_DOMAINS, y=WALMART_GEN, |
| marker_color=BLUE, |
| text=[f"{v:.2f}" for v in WALMART_GEN], |
| textposition="outside", |
| )) |
| ret_vals = [v if v else 0 for v in WALMART_RET] |
| ret_text = [f"{v:.2f}" if v else "β" for v in WALMART_RET] |
| fig.add_trace(go.Bar( |
| name="Retention BERTScore", |
| x=WALMART_DOMAINS, y=ret_vals, |
| marker_color=GREEN, |
| text=ret_text, |
| textposition="outside", |
| )) |
| fig.update_layout( |
| title="Benchmark 2: Walmart Enterprise β 4 Domains on Mistral-7B", |
| yaxis_title="BERTScore", |
| yaxis_range=[0.5, 1.0], |
| barmode="group", |
| template="plotly_white", |
| height=450, |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), |
| ) |
| return fig |
|
|
|
|
| def make_salesforce_chart(): |
| fig = go.Figure() |
| fig.add_trace(go.Scatter( |
| name="Gen BERTScore", |
| x=SF_DOMAINS, y=SF_GEN, |
| mode="lines+markers+text", |
| marker=dict(size=10, color=BLUE), |
| text=[f"{v:.3f}" for v in SF_GEN], |
| textposition="top center", |
| )) |
| ret_x = SF_DOMAINS[1:] |
| ret_y = [v for v in SF_RET if v] |
| fig.add_trace(go.Scatter( |
| name="Retention BERTScore", |
| x=ret_x, y=ret_y, |
| mode="lines+markers+text", |
| marker=dict(size=10, color=GREEN), |
| text=[f"{v:.3f}" for v in ret_y], |
| textposition="bottom center", |
| line=dict(dash="dot"), |
| )) |
| fig.update_layout( |
| title="Benchmark 3: Salesforce Enterprise β 5 Domains, Positive Backward Transfer", |
| yaxis_title="BERTScore", |
| yaxis_range=[0.85, 0.92], |
| template="plotly_white", |
| height=450, |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), |
| ) |
| return fig |
|
|
|
|
| def make_salesforce_gn_chart(): |
| fig = go.Figure() |
| fig.add_trace(go.Bar( |
| name="Peak Gradient Norm", |
| x=SF_DOMAINS, y=SF_GN, |
| marker_color=BLUE, |
| text=[f"{v:.2f}" for v in SF_GN], |
| textposition="outside", |
| )) |
| fig.add_hline(y=263, line_dash="dash", line_color=RED, |
| annotation_text="Naive LoRA: 263 (crashed)", annotation_position="top left") |
| fig.update_layout( |
| title="Salesforce β Gradient Stability (Naive LoRA crashed at 263)", |
| yaxis_title="Peak Gradient Norm", |
| template="plotly_white", |
| height=400, |
| ) |
| return fig |
|
|
|
|
| def make_dental_chart(): |
| fig = go.Figure() |
| fig.add_trace(go.Scatter( |
| name="Naive LoRA", |
| x=DENTAL_DOMAINS, y=DENTAL_NAIVE_GN, |
| mode="lines+markers", |
| marker=dict(size=8, color=RED), |
| line=dict(width=2), |
| )) |
| fig.add_trace(go.Scatter( |
| name="ModelBrew", |
| x=DENTAL_DOMAINS, y=DENTAL_MB_GN, |
| mode="lines+markers", |
| marker=dict(size=8, color=GREEN), |
| line=dict(width=2), |
| )) |
| fig.update_layout( |
| title="Benchmark 4: Dental Stress Test β 8 Sequential Domains", |
| yaxis_title="Peak Gradient Norm", |
| template="plotly_white", |
| height=450, |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), |
| ) |
| return fig |
|
|
|
|
| def make_summary_chart(): |
| experiments = ["Research\n(5 domains)", "Walmart\n(4 domains)", "Salesforce\n(5 domains)", "Dental\n(8 domains)"] |
| naive = [43.0, None, None, None] |
| ours = [0.17, 0, 0, 0] |
| fig = go.Figure() |
| fig.add_trace(go.Bar( |
| name="Naive LoRA Forgetting", |
| x=["Research\n(5 domains)"], y=[43.0], |
| marker_color=RED, |
| text=["+43.0%"], |
| textposition="outside", |
| width=0.3, |
| )) |
| fig.add_trace(go.Bar( |
| name="ModelBrew Drift", |
| x=experiments, y=[0.17, 0, 0, 0], |
| marker_color=GREEN, |
| text=["-0.17%", "Zero", "Zero\n(positive transfer)", "Zero"], |
| textposition="outside", |
| width=0.3, |
| )) |
| fig.update_layout( |
| title="Zero Forgetting Across All 4 Benchmarks", |
| yaxis_title="Knowledge Lost (%)", |
| barmode="group", |
| template="plotly_white", |
| height=450, |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), |
| ) |
| return fig |
|
|
|
|
| OVERVIEW_MD = """ |
| # Zero Forgetting in LLM Fine-Tuning |
| |
| **Every fine-tuning run destroys what the model already knew.** Train on medical, then legal β medical is gone. |
| |
| ModelBrew is a continual learning adapter (~0.1% additional parameters) that solves catastrophic forgetting. Train one model on domain after domain β **it keeps everything.** |
| |
| --- |
| |
| ### 4 Benchmarks on Mistral-7B. Zero Forgetting. Every Single One. |
| |
| | Benchmark | Domains | Seeds | Result | |
| |-----------|---------|-------|--------| |
| | **Research** | 5 (Medical β Legal β Financial β Code β Science) | 3 | **-0.17% drift** vs +43% naive forgetting | |
| | **Walmart** | 4 (Customer Service β Product β HR β Finance) | 1 | **BERTScores 0.82β0.94** across all domains | |
| | **Salesforce** | 5 (CRM β Sales β Reporting β Support β Admin) | 1 | **Positive backward transfer** (0.889 β 0.907) | |
| | **Dental** | 8 sequential domains | 2 | **Gradient norms stable**, zero explosions | |
| |
| - Spectral norm locked at **1.0** across every experiment |
| - Naive LoRA crashed at step 43 with gradient norm **263**. Ours: peak under **6** |
| - No replay buffers. No EWC. No knowledge distillation. No retraining from scratch. |
| |
| --- |
| |
| ### What This Means |
| |
| Right now every AI team in the world throws away learned knowledge every time they fine-tune. That's billions of dollars in wasted compute and a fundamental barrier to AI that actually builds on what it knows over time. |
| |
| - A hospital trains one model across radiology, pathology, cardiology β it keeps learning, never forgets |
| - A legal AI learns new case law without losing old precedent |
| - Models in developing countries accumulate knowledge across languages and domains on limited hardware |
| |
| --- |
| |
| ### What's Shipped |
| |
| - **Live product** β processing real training runs today |
| - **196 automated tests** β CI pipeline on GitHub Actions |
| - **US patent pending** β provisional filed February 2026 |
| - **7 technical reports** β from 50+ failed experiments to the working method |
| - **Free tier** β try it right now, no credit card needed |
| |
| Google published Nested Learning at NeurIPS 2025. Meta has Sparse Memory Finetuning. Neither is available to use. **This is.** |
| |
| --- |
| |
| **[Try it live](https://mhc-finetune-saas-zrtokzlkbnue9zsk7jfgad.streamlit.app)** | **[API](https://fourwheels2512--crma-finetune-fastapi-app.modal.run/docs)** | **Patent Pending (US Provisional, Feb 2026)** |
| |
| *Kiran Nayudu β ModelBrew AI β fourwheels2512@gmail.com* |
| """ |
|
|
| |
| with gr.Blocks( |
| title="ModelBrew AI β Zero Forgetting Benchmarks", |
| ) as demo: |
|
|
| gr.Markdown("# ModelBrew AI β Zero Forgetting Benchmark Results") |
| gr.Markdown("*4 independent benchmarks on Mistral-7B. Zero catastrophic forgetting across all of them.*") |
|
|
| with gr.Tabs(): |
| with gr.Tab("Overview"): |
| gr.Markdown(OVERVIEW_MD) |
| gr.Plot(make_summary_chart) |
|
|
| with gr.Tab("Research Benchmark"): |
| gr.Markdown(""" |
| ### Multi-Seed Research β 5 Domains, 3 Seeds |
| Medical β Legal β Financial β Code β Science on Mistral-7B. |
| Repeated across 3 random seeds to confirm reproducibility. |
| |
| Naive LoRA destroyed **38β49%** of prior knowledge with every new domain. |
| ModelBrew drifted less than **0.4%**. The negative sign means the model actually *improved* on old domains. |
| |
| Naive LoRA **crashed at step 43** with gradient norm 263. |
| ModelBrew completed every run with peak gradient norm under 6. Spectral norm locked at 1.0. |
| """) |
| gr.Plot(make_seed_chart) |
|
|
| with gr.Tab("Walmart Enterprise"): |
| gr.Markdown(""" |
| ### Walmart Enterprise β 4 Domains |
| Customer Service β Product Knowledge β HR Policy β Financial Analytics. |
| |
| One model. Four enterprise domains. All retained. |
| The final model answers questions across all four with **BERTScores of 0.82β0.94**. |
| """) |
| gr.Plot(make_walmart_chart) |
|
|
| with gr.Tab("Salesforce Enterprise"): |
| gr.Markdown(""" |
| ### Salesforce Enterprise β 5 Domains, Cumulative Adapter |
| CRM Operations β Sales Ops β Reporting & Analytics β Customer Support β Admin & Dev. |
| |
| Retention BERTScores went **UP** with each new domain β 0.889 β 0.891 β 0.897 β 0.907. |
| The model gets *better* at old domains as it learns new ones. **Positive backward transfer.** |
| |
| Peak gradient norms stayed between **2.1 and 3.7**. Zero gradient explosions. |
| """) |
| gr.Plot(make_salesforce_chart) |
| gr.Plot(make_salesforce_gn_chart) |
|
|
| with gr.Tab("Dental Stress Test"): |
| gr.Markdown(""" |
| ### Dental Stress Test β 8 Sequential Domains, 2 Seeds |
| The longest chain we've tested. Eight sequential domains on Mistral-7B. |
| |
| Peak gradient norms stayed between **3.8 and 6.1** across all 8 domains. |
| Naive LoRA gradient norms grew monotonically to **9.4**. |
| Spectral norm: **1.0** throughout. Zero crashes. Zero NaN losses. |
| """) |
| gr.Plot(make_dental_chart) |
|
|
| with gr.Tab("Salesforce Details"): |
| gr.Markdown(""" |
| ### Salesforce β Full Per-Domain Breakdown |
| |
| | Domain | Training Loss | Gen BERTScore | Retention BERTScore | Peak Grad Norm | |
| |--------|:---:|:---:|:---:|:---:| |
| | 1. CRM Operations | 1.33 | 0.882 | β | 3.68 | |
| | 2. Sales Ops | 1.05 | 0.897 | 0.889 | 2.15 | |
| | 3. Reporting & Analytics | 1.24 | 0.890 | 0.891 | 3.16 | |
| | 4. Customer Support | 0.96 | 0.885 | 0.897 | 2.53 | |
| | 5. Admin & Dev | 0.66 | 0.897 | 0.907 | 2.11 | |
| |
| **Key findings:** |
| - Retention BERTScores *improved* as domains accumulated β evidence of positive backward transfer |
| - Training loss decreased across domains (1.33 β 0.66) β the model learns faster with more accumulated knowledge |
| - Peak gradient norms stayed between 2.1β3.7 β zero gradient explosions, zero NaN losses |
| - Final adapter answers questions from all 5 Salesforce domains |
| """) |
|
|
| gr.Markdown("---") |
| gr.Markdown( |
| "*ModelBrew AI β Patent Pending (US Provisional, Feb 2026) β " |
| "[Try it live](https://mhc-finetune-saas-zrtokzlkbnue9zsk7jfgad.streamlit.app) β " |
| "fourwheels2512@gmail.com*" |
| ) |
|
|
| demo.launch(theme=gr.themes.Soft(primary_hue="blue")) |
|
|