Shanmuk4622
/

EDEN-Core-Scripts

+"""
+Project EDEN - Hugging Face Upload Master Script
+Applies all 6 refinements:
+  1. Hardware transparency (1080 Ti / Xeon W-2125)
+  2. E2AM Phase mapping per model
+  3. Phase 1 Zero-Overhead Initialization highlight
+  4. Standardized Green Delta table in every README
+  5. YAML tags with co2_eq_emissions + dataset_size
+  6. Citation section in Main Repo
+"""
+import os
+import json
+import glob
+import math
+from huggingface_hub import HfApi, create_repo, upload_file
+# ─── CONFIG ──────────────────────────────────────────────────────────────────
+HF_TOKEN   = os.environ.get("HF_TOKEN", "")
+HF_USER    = "Shanmuk4622"          # HF username (no org found, uploading under user)
+HF_ORG     = HF_USER               # use user namespace
+BASE_DIR   = os.path.dirname(os.path.abspath(__file__))
+DRY_RUN    = False  # Live upload
+api = HfApi(token=HF_TOKEN)
+# ─── HARDWARE PROFILE ────────────────────────────────────────────────────────
+HARDWARE = {
+    "gpu":  "NVIDIA GeForce GTX 1080 Ti (11 GB VRAM, 250 W TDP)",
+    "cpu":  "Intel Xeon W-2125 (4 cores / 8 threads @ 4.00 GHz)",
+    "ram":  "63.66 GB System RAM",
+    "os":   "Windows 10",
+}
+# ─── E2AM PHASE MAP ──────────────────────────────────────────────────────────
+# Maps folder -> technique label for README
+PHASE_MAP = {
+    "test1": "Phase 2 – Progressive Unfreezing + AMP (E2AM SOTA)",
+    "test2": "Baseline – Standard Full Training (Reference Study)",
+    "test3": "Phase 2 – EDEN Classic Energy-Aware Sparse Training",
+}
+PHASE_DETAIL = {
+    "test1": (
+        "**Phase 1 – Zero-Overhead Initialization:** Dataset pre-loaded into pinned "
+        "System RAM to eliminate disk I/O power spikes.\n\n"
+        "**Phase 2 – Progressive Unfreezing:** Backbone frozen for the first "
+        "`E_unfreeze` epochs (only the classification head trains). At `E_unfreeze`, "
+        "all layers are unfrozen and the learning rate is decayed. "
+        "Gradient accumulation over N micro-batches simulates large batch sizes "
+        "without proportional VRAM cost, slashing power-draw spikes.\n\n"
+        "**AMP (Automated Mixed Precision):** `torch.cuda.amp.autocast()` halves "
+        "GPU memory bandwidth, reducing energy per backward pass.\n\n"
+        "**Sparse Regularisation:** L1 penalty `λ·Σ|W|` applied to trainable "
+        "weights, driving dead neurons to zero and enabling future pruning."
+    ),
+    "test2": (
+        "Standard full fine-tuning used as the **Brute-Force Baseline** for "
+        "energy comparison. All layers trained from epoch 1 with a fixed learning "
+        "rate and no gradient accumulation. Included for transparent EAG benchmarking."
+    ),
+    "test3": (
+        "**Phase 1 – Zero-Overhead Initialization:** Dataset cached in System RAM.\n\n"
+        "**Phase 2 – EDEN Classic:** Energy-aware training loop on classic CNN "
+        "architectures. Applies the same EAG early-exit criterion "
+        "(`EAG < γ_EAG` for 3 consecutive epochs → terminate), L1 sparsity "
+        "penalty, and AMP to architectures like ResNet, VGG, AlexNet, DenseNet, "
+        "InceptionV3, and UNet."
+    ),
+}
+# ─── DATASET META ────────────────────────────────────────────────────────────
+DATASET_META = {
+    "CIFAR-10":           {"size": "60,000 images – 10 classes (32×32 px)", "hf_name": "cifar10"},
+    "CIFAR-100":          {"size": "60,000 images – 100 classes (32×32 px)", "hf_name": "cifar100"},
+    "Custom-ImageNet300": {"size": "~450,000 images – 300 classes (224 px)", "hf_name": "imagenet"},
+    "unknown":            {"size": "N/A", "hf_name": "unknown"},
+}
+# CO2: 0.475 kg CO2e per kWh (global average grid factor)
+KG_CO2_PER_KWH = 0.000000475   # per Joule
+# ─── HELPERS ─────────────────────────────────────────────────────────────────
+def parse_name(filename):
+    fn = filename.lower().replace("\\", "/")
+    dataset = "unknown"
+    arch    = "unknown"
+    if   "cifar100" in fn:  dataset = "CIFAR-100"
+    elif "cifar10"  in fn:  dataset = "CIFAR-10"
+    elif "imagenet" in fn:  dataset = "Custom-ImageNet300"
+    if   "efficientnet" in fn: arch = "EfficientNetV2"
+    elif "convnext"     in fn: arch = "ConvNeXtV2"
+    elif "mobilevit"    in fn: arch = "MobileViTv3"
+    elif "resnet50"     in fn: arch = "ResNet50"
+    elif "resnet18"     in fn: arch = "ResNet18"
+    elif "vgg16"        in fn: arch = "VGG16"
+    elif "alexnet"      in fn: arch = "AlexNet"
+    elif "inception"    in fn: arch = "InceptionV3"
+    elif "densenet"     in fn: arch = "DenseNet121"
+    elif "unet"         in fn: arch = "UNet"
+    return arch, dataset
+def joules_to_co2(joules):
+    kwh = joules / 3_600_000
+    return kwh * 0.475  # kg CO2e
+def folder_to_phase_label(folder):
+    return {"test1": "SOTA Optimized", "test2": "Baseline", "test3": "EDEN Classic"}.get(folder, folder)
+# ─── LOAD STATS ──────────────────────────────────────────────────────────────
+with open(os.path.join(BASE_DIR, "results_summary.json")) as f:
+    results = json.load(f)
+stats_map = {}
+for r in results:
+    arch, dataset = parse_name(r["file"])
+    folder = r["folder"]
+    key = f"{folder}_{arch}_{dataset}"
+    if key not in stats_map or (r["energy"] > 0 and stats_map[key]["energy"] == 0):
+        stats_map[key] = r
+# Build baseline map (ResNet50 from test2 per dataset)
+baselines = {}
+for key, v in stats_map.items():
+    folder, *rest = key.split("_")
+    arch = v.get("arch") or parse_name(v["file"])[0]
+    if folder == "test2":
+        _, ds = parse_name(v["file"])
+        if ds not in baselines:
+            baselines[ds] = v
+        # prefer ResNet50
+        if parse_name(v["file"])[0] == "ResNet50":
+            baselines[ds] = v
+# ─── COLLECT ALL MODELS ──────────────────────────────────────────────────────
+pth_files = glob.glob(os.path.join(BASE_DIR, "**/*.pth"), recursive=True)
+models = []
+for pth in pth_files:
+    rel = os.path.relpath(pth, BASE_DIR)
+    parts = rel.split(os.sep)
+    folder = parts[0]
+    arch, dataset = parse_name(rel)
+    key = f"{folder}_{arch}_{dataset}"
+    stat = stats_map.get(key, {})
+    models.append({
+        "pth": rel, "arch": arch, "dataset": dataset,
+        "folder": folder,
+        "accuracy": stat.get("accuracy", 0),
+        "energy":   stat.get("energy",   0),
+        "time":     stat.get("time",     0),
+        "csv":      stat.get("file",    "N/A"),
+    })
+# ─── README GENERATOR ────────────────────────────────────────────────────────
+def build_readme(model):
+    arch     = model["arch"]
+    dataset  = model["dataset"]
+    folder   = model["folder"]
+    acc      = model["accuracy"]
+    energy   = model["energy"]
+    t        = model["time"]
+    phase    = folder_to_phase_label(folder)
+    ds_meta  = DATASET_META.get(dataset, DATASET_META["unknown"])
+    co2      = joules_to_co2(energy) if energy else 0
+    baseline = baselines.get(dataset, {})
+    b_acc    = baseline.get("accuracy", 0)
+    b_energy = baseline.get("energy",   0)
+    b_arch   = parse_name(baseline.get("file",""))[0] if baseline else "Baseline"
+    # Green Delta
+    if b_energy and energy:
+        energy_savings_pct = (b_energy - energy) / b_energy * 100
+        d_acc = acc - b_acc
+        d_j   = energy - b_energy
+        eag   = d_acc / d_j if d_j != 0 else float("nan")
+        eag_str     = f"{eag:.4e}"
+        savings_str = f"{energy_savings_pct:.2f}%"
+        acc_delta   = f"{d_acc*100:+.2f}%"
+    else:
+        energy_savings_pct = 0
+        eag_str     = "N/A"
+        savings_str = "N/A"
+        acc_delta   = "N/A"
+    # YAML tags
+    arch_tag = arch.lower().replace(" ","")
+    yaml_co2 = f"{co2:.4f}" if co2 else "0"
+    yaml = f"""---
+language: en
+license: apache-2.0
+tags:
+- image-classification
+- green-ai
+- energy-efficiency
+- computer-vision
+- {arch_tag}
+- eden-framework
+- e2am
+- sustainable-ai
+datasets:
+- {ds_meta['hf_name']}
+metrics:
+- accuracy
+co2_eq_emissions:
+  emissions: {yaml_co2}
+  unit: kg
+  source: Estimated via CodeCarbon (grid factor 0.475 kg CO2e/kWh)
+  hardware_used: NVIDIA GeForce GTX 1080 Ti
+dataset_info:
+  dataset_size: "{ds_meta['size']}"
+---"""
+    # Technique section
+    technique = PHASE_DETAIL.get(folder, "Standard training.")
+    # Green Delta Table
+    green_table = f"""| Metric | {b_arch} Baseline | **{arch} (EDEN)** | Δ |
+|---|---|---|---|
+| Accuracy | {b_acc:.4f} | **{acc:.4f}** | `{acc_delta}` |
+| Total Energy (J) | {b_energy:,.0f} | **{energy:,.0f}** | `{savings_str} saved` |
+| CO₂ Emissions (kg) | {joules_to_co2(b_energy):.4f} | **{co2:.4f}** | — |
+| **EAG Score** | — | **{eag_str}** | ΔAcc/ΔJoules |"""
+    cite = f"""## Cite This Research
+If you use this model, please cite the **EDEN / E2AM Framework**:
+```bibtex
+@misc{{eden2025,
+  title     = {{Project EDEN: Energy-Driven Evolution of Networks}},
+  author    = {{EDEN Research Team}},
+  year      = {{2025}},
+  note      = {{Hugging Face Organization: ProjectEDEN}},
+  url       = {{https://huggingface.co/{HF_ORG}}}
+}}
+```"""
+    readme = f"""{yaml}
+# EDEN-{arch}-{dataset} ��� *{phase}*
+> **Primary KPI:** EAG (Energy-to-Accuracy Gradient) = `{eag_str}` ΔAcc/ΔJoules
+## Abstract
+This model is part of **Project EDEN (Energy-Driven Evolution of Networks)**, implementing the **E2AM (Energy Efficient Advanced Model)** Framework. The goal is to shift AI benchmarking from pure accuracy to *Green SOTA* — maximizing predictive power per Joule consumed.
+**Applied Technique:** {PHASE_MAP.get(folder, phase)}
+## Profiling Environment
+| Component | Specification |
+|---|---|
+| **GPU** | {HARDWARE['gpu']} |
+| **CPU** | {HARDWARE['cpu']} |
+| **RAM** | {HARDWARE['ram']} |
+| **OS**  | {HARDWARE['os']} |
+| **Dataset** | {dataset} — {ds_meta['size']} |
+## 🟢 Green Delta Table
+*Comparing this model against the reference baseline (ResNet-50 equivalent)*
+{green_table}
+> A **positive EAG** means this model learns more per Joule than the baseline.
+> A **negative EAG** indicates a trade-off where higher accuracy required more energy investment.
+## E2AM Algorithm — Applied Phases
+{technique}
+## Training Statistics
+| Metric | Value |
+|---|---|
+| Final Accuracy | {acc:.4f} ({acc*100:.2f}%) |
+| Total Energy Consumed | {energy:,.0f} J ({energy/3_600_000:.4f} kWh) |
+| Training Time | {t:,.0f} s ({t/3600:.2f} hrs) |
+| Estimated CO₂ | {co2:.4f} kg CO₂e |
+| Training Log | `{model['csv']}` |
+{cite}
+"""
+    return readme
+# ─── MAIN FRAMEWORK README ───────────────────────────────────────────────────
+def build_main_repo_readme():
+    py_scripts = [os.path.relpath(p, BASE_DIR) for p in
+                  glob.glob(os.path.join(BASE_DIR, "**/*.py"), recursive=True)
+                  if any(k in p for k in ["Algo_", "eden_", "mobilevit_model"])]
+    scripts_md = "\n".join(f"- `{s}`" for s in sorted(py_scripts))
+    return f"""---
+language: en
+license: apache-2.0
+tags:
+- green-ai
+- energy-efficiency
+- e2am
+- eden-framework
+- sustainable-ai
+- image-classification
+---
+# EDEN-Core-Scripts — E2AM Framework Repository
+> **Project EDEN (Energy-Driven Evolution of Networks)** — The complete algorithmic
+> toolkit for Green SOTA image classification research.
+## Why EDEN?
+As deep learning models scale exponentially, the carbon footprint of training has
+reached unsustainable levels. Project EDEN introduces the **EAG
+(Energy-to-Accuracy Gradient)** as the primary KPI — shifting the paradigm from
+chasing raw accuracy to optimising *Green SOTA*.
+## Profiling Environment
+| Component | Specification |
+|---|---|
+| **GPU** | {HARDWARE['gpu']} |
+| **CPU** | {HARDWARE['cpu']} |
+| **RAM** | {HARDWARE['ram']} |
+| **OS**  | {HARDWARE['os']} |
+## The E2AM Algorithm — All Three Phases
+### Phase 1 — Zero-Overhead Initialization
+Dataset pre-loaded into **pinned System RAM** before training begins.
+This eliminates disk I/O power spikes that would otherwise inflate energy readings
+and distort EAG comparisons between architectures.
+### Phase 2 — Two-Stage Energy-Aware Training
+1. **Frozen Head Training** — Only the classification head trains for the first
+   `E_unfreeze` epochs. The backbone consumes no backward-pass energy.
+2. **Progressive Unfreezing** — At epoch `E_unfreeze`, all layers unlock.
+   Learning rate is decayed (`LR × 0.1`) for stable fine-tuning.
+3. **Gradient Accumulation** — Gradients accumulated over N micro-batches,
+   simulating large batch sizes without VRAM spikes.
+4. **AMP (Automated Mixed Precision)** — `torch.cuda.amp.autocast()` halves
+   bandwidth per backward pass.
+5. **Sparse L1 Penalty** — `L_total = CrossEntropy + λ·Σ|W_trainable|`
+6. **EAG Early-Exit** — Training terminates if `EAG < γ_EAG` for 3 consecutive
+   epochs, preventing wasted compute.
+### Phase 3 — Hardware-Aware Deployment *(Post-Training)*
+- **Saliency-Energy Pruning** — Filters with lowest `∂Accuracy/∂W ÷ Energy_cost`
+  are pruned.
+- **INT8 Quantization** — Weights converted for edge-deployment readiness.
+- **Dynamic Depth Routing** — Simple images bypass the middle 50 % of layers
+  via residual skip connections, slashing inference energy.
+## EAG — The Expert KPI
+```
+EAG = ΔAccuracy / ΔJoules
+```
+EAG allows apples-to-apples comparison of any two models regardless of
+architecture family. A higher EAG = more learning per unit of carbon footprint.
+## Scripts in This Repository
+{scripts_md}
+## Cite This Research
+```bibtex
+@misc{{eden2025,
+  title     = {{Project EDEN: Energy-Driven Evolution of Networks}},
+  author    = {{EDEN Research Team}},
+  year      = {{2025}},
+  note      = {{Hugging Face Organization: ProjectEDEN}},
+  url       = {{https://huggingface.co/{HF_ORG}}}
+}}
+```
+"""
+# ─── OUTPUT / UPLOAD ─────────────────────────────────────────────────────────
+OUT_DIR = os.path.join(BASE_DIR, "hf_readmes")
+os.makedirs(OUT_DIR, exist_ok=True)
+# 1. Main repo README
+main_readme = build_main_repo_readme()
+main_readme_path = os.path.join(OUT_DIR, "EDEN-Core-Scripts_README.md")
+with open(main_readme_path, "w", encoding="utf-8") as f:
+    f.write(main_readme)
+print("✓ Main repo README written.")
+# 2. Per-model READMEs (deduplicated by repo name)
+generated_repos = set()
+repo_model_map  = {}   # repo_name -> (model, readme_text)
+for m in models:
+    if m["arch"] == "unknown" or m["dataset"] == "unknown": continue
+    repo_name = f"EDEN-{m['arch']}-{m['dataset'].replace(' ','-')}"
+    # prefer highest-accuracy model per repo
+    if repo_name not in repo_model_map or m["accuracy"] > repo_model_map[repo_name][0]["accuracy"]:
+        readme_text = build_readme(m)
+        repo_model_map[repo_name] = (m, readme_text)
+for repo_name, (m, readme_text) in repo_model_map.items():
+    path = os.path.join(OUT_DIR, f"{repo_name}_README.md")
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(readme_text)
+    print(f"✓ {repo_name} README written.")
+print(f"\n{'='*60}")
+print(f"Generated {len(repo_model_map)+1} README files in: {OUT_DIR}")
+if not DRY_RUN:
+    print("\nStarting HF upload...")
+    # Upload Main Repo README
+    try:
+        create_repo(repo_id=f"{HF_ORG}/EDEN-Core-Scripts", token=HF_TOKEN,
+                    repo_type="model", exist_ok=True, private=False)
+        upload_file(path_or_fileobj=main_readme_path,
+                    path_in_repo="README.md",
+                    repo_id=f"{HF_ORG}/EDEN-Core-Scripts",
+                    token=HF_TOKEN, repo_type="model")
+        # Upload all .py scripts
+        for py in glob.glob(os.path.join(BASE_DIR, "**/*.py"), recursive=True):
+            rel = os.path.relpath(py, BASE_DIR)
+            if any(k in rel for k in ["Algo_","eden_","mobilevit_model"]):
+                upload_file(path_or_fileobj=py,
+                            path_in_repo=rel.replace("\\","/"),
+                            repo_id=f"{HF_ORG}/EDEN-Core-Scripts",
+                            token=HF_TOKEN, repo_type="model")
+        print("✓ Uploaded EDEN-Core-Scripts")
+    except Exception as e:
+        print(f"✗ Core-Scripts error: {e}")
+    # Upload per-model repos
+    for repo_name, (m, readme_text) in repo_model_map.items():
+        try:
+            create_repo(repo_id=f"{HF_ORG}/{repo_name}", token=HF_TOKEN,
+                        repo_type="model", exist_ok=True, private=False)
+            readme_path = os.path.join(OUT_DIR, f"{repo_name}_README.md")
+            upload_file(path_or_fileobj=readme_path,
+                        path_in_repo="README.md",
+                        repo_id=f"{HF_ORG}/{repo_name}",
+                        token=HF_TOKEN, repo_type="model")
+            # Upload weights
+            pth_abs = os.path.join(BASE_DIR, m["pth"])
+            if os.path.exists(pth_abs):
+                upload_file(path_or_fileobj=pth_abs,
+                            path_in_repo=os.path.basename(m["pth"]),
+                            repo_id=f"{HF_ORG}/{repo_name}",
+                            token=HF_TOKEN, repo_type="model")
+            # Upload CSV log
+            if m["csv"] != "N/A":
+                csv_abs = os.path.join(BASE_DIR, m["csv"])
+                if os.path.exists(csv_abs):
+                    upload_file(path_or_fileobj=csv_abs,
+                                path_in_repo=os.path.basename(m["csv"]),
+                                repo_id=f"{HF_ORG}/{repo_name}",
+                                token=HF_TOKEN, repo_type="model")
+            print(f"✓ Uploaded {repo_name}")
+        except Exception as e:
+            print(f"✗ {repo_name} error: {e}")
+    print("\nAll uploads complete.")
+else:
+    print("\n[DRY RUN] Set DRY_RUN=False to execute HF uploads.")