Shanmuk4622 commited on
Commit
aac87ab
·
verified ·
1 Parent(s): c2aaa6b

Upload test3/eden_AlexNet_CIFAR10.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test3/eden_AlexNet_CIFAR10.py +189 -0
test3/eden_AlexNet_CIFAR10.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import torchvision
5
+ import torchvision.transforms as transforms
6
+ from torch.utils.data import DataLoader, TensorDataset
7
+ from sklearn.metrics import f1_score, precision_score, recall_score
8
+ from codecarbon import EmissionsTracker
9
+ from thop import profile
10
+ import time
11
+ import pandas as pd
12
+ import numpy as np
13
+ import os
14
+ import warnings
15
+ from datetime import timedelta
16
+
17
+ # --- Configuration ---
18
+ MODEL_NAME = "alexnet_EDEN"
19
+ DATASET_NAME = "CIFAR10"
20
+ DATA_PATH = r'C:\Users\shanm\Dataset Download\CIFAR10'
21
+ BATCH_SIZE = 128
22
+ ACCUMULATION_STEPS = 4 # Simulates a larger batch size of 512 for energy stability
23
+ EPOCHS = 15
24
+ E_UNFREEZE = 10 # When to unfreeze the backbone (EDEN Phase 2)
25
+ LAMBDA_L1 = 1e-5 # Sparsity penalty (EDEN Phase 2)
26
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
+
28
+ SAVE_DIR = "saved_models"
29
+ os.makedirs(SAVE_DIR, exist_ok=True)
30
+ CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv"
31
+
32
+ warnings.filterwarnings("ignore")
33
+ os.environ["CODECARBON_LOG_LEVEL"] = "error"
34
+
35
+ def main():
36
+ # --- Phase 1: Zero-Overhead Initialization (RAM Caching) ---
37
+ transform = transforms.Compose([
38
+ transforms.Resize(224), # AlexNet pre-trained expects 224x224
39
+ transforms.ToTensor(),
40
+ transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
41
+ ])
42
+
43
+ print(f"[*] Caching {DATASET_NAME} to System RAM for zero-I/O overhead...")
44
+ full_dataset = torchvision.datasets.CIFAR10(root=DATA_PATH, train=True, download=False, transform=transform)
45
+
46
+ # Load all data into memory tensors
47
+ all_data = []
48
+ all_targets = []
49
+ for img, target in full_dataset:
50
+ all_data.append(img)
51
+ all_targets.append(target)
52
+
53
+ cached_trainset = TensorDataset(torch.stack(all_data), torch.tensor(all_targets))
54
+ trainloader = DataLoader(cached_trainset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
55
+
56
+ # --- Model Setup (Transfer Learning) ---
57
+ # Using IMAGENET1K_V1 as per EDEN Algorithm Phase 1
58
+ model = torchvision.models.alexnet(weights='IMAGENET1K_V1')
59
+ model.classifier[6] = nn.Linear(4096, 10) # 10 classes for CIFAR-10
60
+
61
+ # Initially freeze backbone
62
+ for param in model.features.parameters():
63
+ param.requires_grad = False
64
+
65
+ model.to(DEVICE)
66
+
67
+ # Calculate FLOPs & Parameters
68
+ dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE)
69
+ flops, params = profile(model, inputs=(dummy_input, ), verbose=False)
70
+
71
+ criterion = nn.CrossEntropyLoss()
72
+ optimizer = optim.AdamW(model.parameters(), lr=1e-3)
73
+ scaler = torch.cuda.amp.GradScaler() # For Automated Mixed Precision (AMP)
74
+
75
+ results = []
76
+ cumulative_total_energy = 0
77
+ total_start_time = time.time()
78
+ best_acc = 0.0
79
+
80
+ tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error')
81
+
82
+ print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M | Batch Size: {BATCH_SIZE}")
83
+ print(f"{'='*140}")
84
+ print(f"{'Epoch':<6} | {'Loss':<7} | {'Acc':<7} | {'Total(J)':<9} | {'VRAM(GB)':<9} | {'EAG':<8} | {'Status'}")
85
+ print(f"{'-'*140}")
86
+
87
+ for epoch in range(1, EPOCHS + 1):
88
+ # --- Phase 2: Progressive Unfreezing ---
89
+ if epoch == E_UNFREEZE:
90
+ for param in model.features.parameters():
91
+ param.requires_grad = True
92
+ # Reduce LR for fine-tuning
93
+ for param_group in optimizer.param_groups:
94
+ param_group['lr'] = 1e-5
95
+ status_msg = "UNFROZEN"
96
+ else:
97
+ status_msg = "FROZEN" if epoch < E_UNFREEZE else "FINE-TUNING"
98
+
99
+ model.train()
100
+ tracker.start()
101
+ epoch_start_time = time.time()
102
+ running_loss, all_preds, all_labels, grad_norms = 0.0, [], [], []
103
+
104
+ optimizer.zero_grad()
105
+ for i, (inputs, labels) in enumerate(trainloader):
106
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
107
+
108
+ # Automated Mixed Precision Forward Pass
109
+ with torch.cuda.amp.autocast():
110
+ outputs = model(inputs)
111
+ cls_loss = criterion(outputs, labels)
112
+
113
+ # Sparse Training Penalty (L1)
114
+ l1_penalty = sum(p.abs().sum() for p in model.parameters() if p.requires_grad)
115
+ loss = (cls_loss + LAMBDA_L1 * l1_penalty) / ACCUMULATION_STEPS
116
+
117
+ scaler.scale(loss).backward()
118
+
119
+ # Gradient Accumulation
120
+ if (i + 1) % ACCUMULATION_STEPS == 0:
121
+ scaler.unscale_(optimizer)
122
+ grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
123
+ grad_norms.append(grad_norm.item())
124
+
125
+ scaler.step(optimizer)
126
+ scaler.update()
127
+ optimizer.zero_grad()
128
+
129
+ running_loss += cls_loss.item()
130
+ _, predicted = torch.max(outputs.data, 1)
131
+ all_preds.extend(predicted.cpu().numpy())
132
+ all_labels.extend(labels.cpu().numpy())
133
+
134
+ emissions_kg = tracker.stop()
135
+ duration = time.time() - epoch_start_time
136
+
137
+ # Energy Metrics (kWh to Joules)
138
+ e_gpu = tracker.final_emissions_data.gpu_energy * 3600000
139
+ e_cpu = tracker.final_emissions_data.cpu_energy * 3600000
140
+ e_ram = tracker.final_emissions_data.ram_energy * 3600000
141
+ total_energy = e_gpu + e_cpu + e_ram
142
+ cumulative_total_energy += total_energy
143
+
144
+ acc = (np.array(all_preds) == np.array(all_labels)).mean()
145
+ f1 = f1_score(all_labels, all_preds, average='macro')
146
+ vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3)
147
+ eag = acc / (total_energy / 1000) if total_energy > 0 else 0
148
+
149
+ # CSV Logging
150
+ epoch_stats = {
151
+ "epoch": epoch,
152
+ "status": status_msg,
153
+ "loss": running_loss / len(trainloader),
154
+ "accuracy": acc,
155
+ "f1_score": f1,
156
+ "precision": precision_score(all_labels, all_preds, average='macro', zero_division=0),
157
+ "recall": recall_score(all_labels, all_preds, average='macro', zero_division=0),
158
+ "energy_gpu_j": e_gpu,
159
+ "energy_cpu_j": e_cpu,
160
+ "energy_ram_j": e_ram,
161
+ "total_energy_j": total_energy,
162
+ "cumulative_energy_j": cumulative_total_energy,
163
+ "carbon_kg": emissions_kg,
164
+ "vram_gb": vram_peak,
165
+ "latency_ms": (duration / len(trainloader)) * 1000,
166
+ "eag_metric": eag,
167
+ "grad_norm": np.mean(grad_norms) if grad_norms else 0,
168
+ "model_flops": flops,
169
+ "model_params": params,
170
+ "batch_size": BATCH_SIZE,
171
+ "accumulation_steps": ACCUMULATION_STEPS,
172
+ "effective_batch_size": BATCH_SIZE * ACCUMULATION_STEPS
173
+ }
174
+ results.append(epoch_stats)
175
+ pd.DataFrame(results).to_csv(CSV_FILENAME, index=False)
176
+
177
+ if acc > best_acc:
178
+ best_acc = acc
179
+ torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}.pth"))
180
+ best_tag = "*"
181
+ else:
182
+ best_tag = ""
183
+
184
+ print(f"{epoch:02d}/50 | {epoch_stats['loss']:.4f} | {acc:.2%} | {total_energy:<9.2f} | {vram_peak:<9.3f} | {eag:<8.4f} | {status_msg}{best_tag}")
185
+
186
+ print(f"{'='*140}\n[FINISH] Results saved to {CSV_FILENAME}")
187
+
188
+ if __name__ == '__main__':
189
+ main()