Shanmuk4622 commited on
Commit
ddb3c40
·
verified ·
1 Parent(s): 5ff8c0d

Upload test3/eden_AlexNet_ImageNet.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test3/eden_AlexNet_ImageNet.py +188 -0
test3/eden_AlexNet_ImageNet.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import torchvision
5
+ import torchvision.transforms as transforms
6
+ from torch.utils.data import DataLoader, random_split
7
+ from sklearn.metrics import f1_score, precision_score, recall_score
8
+ from codecarbon import EmissionsTracker
9
+ from thop import profile
10
+ import time
11
+ import pandas as pd
12
+ import numpy as np
13
+ import os
14
+ import warnings
15
+ import copy
16
+ from datetime import timedelta
17
+
18
+ # --- Configuration ---
19
+ MODEL_NAME = "alexnet_EDEN"
20
+ DATASET_NAME = "CustomImageNet300"
21
+ # Path to the folder containing your 300 class folders
22
+ DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net'
23
+ BATCH_SIZE = 128
24
+ ACCUMULATION_STEPS = 4 # Effective Batch Size = 512
25
+ EPOCHS = 15
26
+ E_UNFREEZE = 10
27
+ LAMBDA_L1 = 1e-5
28
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+
30
+ SAVE_DIR = "saved_models"
31
+ os.makedirs(SAVE_DIR, exist_ok=True)
32
+ CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv"
33
+
34
+ warnings.filterwarnings("ignore")
35
+ os.environ["CODECARBON_LOG_LEVEL"] = "error"
36
+
37
+ def main():
38
+ # --- Phase 1: High-Resolution Initialization ---
39
+ transform = transforms.Compose([
40
+ transforms.Resize(256),
41
+ transforms.CenterCrop(224),
42
+ transforms.ToTensor(),
43
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
44
+ ])
45
+
46
+ print(f"[*] Loading {DATASET_NAME} from disk (80/20 Random Split)...")
47
+
48
+ # Load the root folder directly since your class folders are there
49
+ full_dataset = torchvision.datasets.ImageFolder(root=DATA_PATH, transform=transform)
50
+
51
+ # Calculate split sizes
52
+ train_size = int(0.8 * len(full_dataset))
53
+ val_size = len(full_dataset) - train_size
54
+
55
+ # Split the dataset
56
+ train_dataset, val_dataset = random_split(
57
+ full_dataset, [train_size, val_size],
58
+ generator=torch.Generator().manual_seed(42) # Consistent split
59
+ )
60
+
61
+ trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
62
+ # Note: We track training metrics for your audit, but you can also use valloader for validation later
63
+
64
+ print(f"[*] Found {len(full_dataset)} images across {len(full_dataset.classes)} classes.")
65
+
66
+ # --- Model Setup (EDEN Phase 1) ---
67
+ model = torchvision.models.alexnet(weights='IMAGENET1K_V1')
68
+ model.classifier[6] = nn.Linear(4096, 300) # Match your 300 classes
69
+
70
+ # Static Profiling on Clone (The 'total_ops' fix)
71
+ print("[*] Profiling hardware requirements...")
72
+ model_for_profile = copy.deepcopy(model).to(DEVICE)
73
+ dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE)
74
+ flops, params = profile(model_for_profile, inputs=(dummy_input, ), verbose=False)
75
+ del model_for_profile
76
+
77
+ # Freeze backbone
78
+ for param in model.features.parameters():
79
+ param.requires_grad = False
80
+
81
+ model.to(DEVICE)
82
+
83
+ criterion = nn.CrossEntropyLoss()
84
+ optimizer = optim.AdamW(model.parameters(), lr=1e-3)
85
+ scaler = torch.cuda.amp.GradScaler()
86
+
87
+ results = []
88
+ cumulative_total_energy = 0
89
+ total_start_time = time.time()
90
+ best_acc = 0.0
91
+
92
+ tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error')
93
+
94
+ print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M")
95
+ print(f"{'='*140}")
96
+ print(f"{'Epoch':<6} | {'Loss':<7} | {'Acc':<7} | {'Total(J)':<9} | {'VRAM(GB)':<9} | {'EAG':<8} | {'Status'}")
97
+ print(f"{'-'*140}")
98
+
99
+ for epoch in range(1, EPOCHS + 1):
100
+ if epoch == E_UNFREEZE:
101
+ for param in model.parameters():
102
+ param.requires_grad = True
103
+ for param_group in optimizer.param_groups:
104
+ param_group['lr'] = 1e-5
105
+ status_msg = "UNFROZEN"
106
+ else:
107
+ status_msg = "FROZEN" if epoch < E_UNFREEZE else "FINE-TUNING"
108
+
109
+ model.train()
110
+ tracker.start()
111
+ epoch_start_time = time.time()
112
+ running_loss, all_preds, all_labels, grad_norms = 0.0, [], [], []
113
+
114
+ optimizer.zero_grad()
115
+ for i, (inputs, labels) in enumerate(trainloader):
116
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
117
+
118
+ with torch.cuda.amp.autocast():
119
+ outputs = model(inputs)
120
+ cls_loss = criterion(outputs, labels)
121
+ l1_penalty = sum(p.abs().sum() for p in model.parameters() if p.requires_grad)
122
+ loss = (cls_loss + LAMBDA_L1 * l1_penalty) / ACCUMULATION_STEPS
123
+
124
+ scaler.scale(loss).backward()
125
+
126
+ if (i + 1) % ACCUMULATION_STEPS == 0:
127
+ scaler.unscale_(optimizer)
128
+ grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
129
+ grad_norms.append(grad_norm.item())
130
+ scaler.step(optimizer)
131
+ scaler.update()
132
+ optimizer.zero_grad()
133
+
134
+ running_loss += cls_loss.item()
135
+ _, predicted = torch.max(outputs.data, 1)
136
+ all_preds.extend(predicted.cpu().numpy())
137
+ all_labels.extend(labels.cpu().numpy())
138
+
139
+ emissions_kg = tracker.stop()
140
+ duration = time.time() - epoch_start_time
141
+
142
+ # Energy Metrics (kWh to Joules)
143
+ e_gpu = tracker.final_emissions_data.gpu_energy * 3600000
144
+ e_cpu = tracker.final_emissions_data.cpu_energy * 3600000
145
+ e_ram = tracker.final_emissions_data.ram_energy * 3600000
146
+ total_energy = e_gpu + e_cpu + e_ram
147
+ cumulative_total_energy += total_energy
148
+
149
+ acc = (np.array(all_preds) == np.array(all_labels)).mean()
150
+ f1 = f1_score(all_labels, all_preds, average='macro')
151
+ vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3)
152
+ eag = acc / (total_energy / 1000) if total_energy > 0 else 0
153
+
154
+ # Detailed Audit Row
155
+ epoch_stats = {
156
+ "epoch": epoch, "status": status_msg, "loss": running_loss / len(trainloader),
157
+ "accuracy": acc, "f1_score": f1,
158
+ "precision": precision_score(all_labels, all_preds, average='macro', zero_division=0),
159
+ "recall": recall_score(all_labels, all_preds, average='macro', zero_division=0),
160
+ "energy_gpu_j": e_gpu, "energy_cpu_j": e_cpu, "energy_ram_j": e_ram,
161
+ "total_energy_j": total_energy, "cumulative_total_energy_j": cumulative_total_energy,
162
+ "carbon_kg": emissions_kg, "vram_gb": vram_peak,
163
+ "latency_ms": (duration / len(trainloader)) * 1000,
164
+ "eag_metric": eag, "grad_norm": np.mean(grad_norms) if grad_norms else 0,
165
+ "model_flops": flops, "model_params": params,
166
+ "batch_size": BATCH_SIZE, "accumulation_steps": ACCUMULATION_STEPS
167
+ }
168
+ results.append(epoch_stats)
169
+ pd.DataFrame(results).to_csv(CSV_FILENAME, index=False)
170
+
171
+ if acc > best_acc:
172
+ best_acc = acc
173
+ torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth"))
174
+ best_tag = "*"
175
+ else:
176
+ best_tag = ""
177
+
178
+ print(f"{epoch:02d}/50 | {epoch_stats['loss']:.4f} | {acc:.2%} | {total_energy:<9.2f} | {vram_peak:<9.3f} | {eag:<8.4f} | {status_msg}{best_tag}")
179
+
180
+ # Memory cleanup for batch processing
181
+ del model, trainloader
182
+ torch.cuda.empty_cache()
183
+ import gc; gc.collect()
184
+
185
+ print(f"{'='*140}\n[FINISH] AlexNet on ImageNet300 saved to {CSV_FILENAME}")
186
+
187
+ if __name__ == '__main__':
188
+ main()