Shanmuk4622 commited on
Commit
a169a1b
·
verified ·
1 Parent(s): a13b817

Upload test3/eden_ResNet18_ImageNet.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test3/eden_ResNet18_ImageNet.py +170 -0
test3/eden_ResNet18_ImageNet.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import torchvision
5
+ import torchvision.transforms as transforms
6
+ from torch.utils.data import DataLoader, random_split
7
+ from sklearn.metrics import f1_score, precision_score, recall_score
8
+ from codecarbon import EmissionsTracker
9
+ from thop import profile
10
+ import time, pandas as pd, numpy as np, os, warnings, copy, gc
11
+ from datetime import timedelta
12
+
13
+ # --- Configuration ---
14
+ MODEL_NAME = "resnet18_EDEN"
15
+ DATASET_NAME = "CustomImageNet300"
16
+ # Path to the folder containing your 300 class folders directly
17
+ DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net'
18
+ BATCH_SIZE = 128
19
+ ACCUMULATION_STEPS = 4 # Effective Batch Size = 512
20
+ EPOCHS = 20
21
+ E_UNFREEZE = 10
22
+ LAMBDA_L1 = 1e-5
23
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+
25
+ SAVE_DIR = "saved_models"
26
+ os.makedirs(SAVE_DIR, exist_ok=True)
27
+ CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv"
28
+
29
+ warnings.filterwarnings("ignore")
30
+ os.environ["CODECARBON_LOG_LEVEL"] = "error"
31
+
32
+ def main():
33
+ # --- Phase 1: High-Resolution Initialization ---
34
+ transform = transforms.Compose([
35
+ transforms.Resize(256),
36
+ transforms.CenterCrop(224),
37
+ transforms.ToTensor(),
38
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
39
+ ])
40
+
41
+ print(f"[*] Loading {DATASET_NAME} from disk (80/20 Random Split)...")
42
+ # Load from root since your folders are flat
43
+ full_dataset = torchvision.datasets.ImageFolder(root=DATA_PATH, transform=transform)
44
+
45
+ # Split into 80% Train, 20% Val
46
+ train_size = int(0.8 * len(full_dataset))
47
+ val_size = len(full_dataset) - train_size
48
+ train_dataset, _ = random_split(
49
+ full_dataset, [train_size, val_size],
50
+ generator=torch.Generator().manual_seed(42)
51
+ )
52
+
53
+ # DataLoader with 4 workers to keep the 1080 Ti busy
54
+ trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
55
+
56
+ # --- Model Setup ---
57
+ model = torchvision.models.resnet18(weights='IMAGENET1K_V1')
58
+ model.fc = nn.Linear(model.fc.in_features, 300) # Match your 300 classes
59
+
60
+ # 1. Profile on clone to avoid thop/pytorch hook conflicts
61
+ print("[*] Calculating hardware metrics (FLOPs/Params)...")
62
+ model_for_profile = copy.deepcopy(model).to(DEVICE)
63
+ dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE)
64
+ flops, params = profile(model_for_profile, inputs=(dummy_input, ), verbose=False)
65
+ del model_for_profile
66
+
67
+ # 2. Initially freeze backbone for EDEN Phase 2
68
+ for name, param in model.named_parameters():
69
+ if "fc" not in name:
70
+ param.requires_grad = False
71
+
72
+ model.to(DEVICE)
73
+
74
+ criterion = nn.CrossEntropyLoss()
75
+ optimizer = optim.AdamW(model.parameters(), lr=1e-3)
76
+ scaler = torch.cuda.amp.GradScaler()
77
+
78
+ results = []
79
+ cumulative_total_energy = 0
80
+ best_acc = 0.0
81
+
82
+ tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error')
83
+
84
+ print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M | Classes: 300")
85
+ print(f"{'='*140}")
86
+ print(f"{'Epoch':<6} | {'Loss':<7} | {'Acc':<7} | {'Total(J)':<9} | {'VRAM(GB)':<9} | {'EAG':<8} | {'Status'}")
87
+ print(f"{'-'*140}")
88
+
89
+ for epoch in range(1, EPOCHS + 1):
90
+ if epoch == E_UNFREEZE:
91
+ for param in model.parameters(): param.requires_grad = True
92
+ for pg in optimizer.param_groups: pg['lr'] = 1e-5
93
+ status_msg = "UNFROZEN"
94
+ else:
95
+ status_msg = "FROZEN" if epoch < E_UNFREEZE else "FINE-TUNING"
96
+
97
+ model.train()
98
+ tracker.start()
99
+ epoch_start_time = time.time()
100
+ running_loss, all_preds, all_labels = 0.0, [], []
101
+
102
+ optimizer.zero_grad()
103
+ for i, (inputs, labels) in enumerate(trainloader):
104
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
105
+
106
+ with torch.cuda.amp.autocast():
107
+ outputs = model(inputs)
108
+ cls_loss = criterion(outputs, labels)
109
+ l1_penalty = sum(p.abs().sum() for p in model.parameters() if p.requires_grad)
110
+ loss = (cls_loss + LAMBDA_L1 * l1_penalty) / ACCUMULATION_STEPS
111
+
112
+ scaler.scale(loss).backward()
113
+
114
+ if (i + 1) % ACCUMULATION_STEPS == 0:
115
+ scaler.unscale_(optimizer)
116
+ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
117
+ scaler.step(optimizer)
118
+ scaler.update()
119
+ optimizer.zero_grad()
120
+
121
+ running_loss += cls_loss.item()
122
+ _, predicted = torch.max(outputs.data, 1)
123
+ all_preds.extend(predicted.cpu().numpy()); all_labels.extend(labels.cpu().numpy())
124
+
125
+ emissions_kg = tracker.stop()
126
+ duration = time.time() - epoch_start_time
127
+
128
+ # Energy Metrics (kWh to Joules)
129
+ e_gpu = tracker.final_emissions_data.gpu_energy * 3600000
130
+ e_cpu = tracker.final_emissions_data.cpu_energy * 3600000
131
+ e_ram = tracker.final_emissions_data.ram_energy * 3600000
132
+ total_energy = e_gpu + e_cpu + e_ram
133
+ cumulative_total_energy += total_energy
134
+
135
+ acc = (np.array(all_preds) == np.array(all_labels)).mean()
136
+ f1 = f1_score(all_labels, all_preds, average='macro')
137
+ vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3)
138
+ eag = acc / (total_energy / 1000) if total_energy > 0 else 0
139
+
140
+ # Full Audit Row for .csv
141
+ epoch_stats = {
142
+ "epoch": epoch, "status": status_msg, "loss": running_loss / len(trainloader),
143
+ "accuracy": acc, "f1_score": f1,
144
+ "precision": precision_score(all_labels, all_preds, average='macro', zero_division=0),
145
+ "recall": recall_score(all_labels, all_preds, average='macro', zero_division=0),
146
+ "energy_gpu_j": e_gpu, "energy_cpu_j": e_cpu, "energy_ram_j": e_ram,
147
+ "total_energy_j": total_energy, "cumulative_total_energy_j": cumulative_total_energy,
148
+ "carbon_kg": emissions_kg, "vram_gb": vram_peak,
149
+ "latency_ms": (duration / len(trainloader)) * 1000,
150
+ "eag_metric": eag, "model_flops": flops, "model_params": params
151
+ }
152
+ results.append(epoch_stats)
153
+ pd.DataFrame(results).to_csv(CSV_FILENAME, index=False)
154
+
155
+ best_tag = "*" if acc > best_acc else ""
156
+ if acc > best_acc:
157
+ best_acc = acc
158
+ torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth"))
159
+
160
+ print(f"{epoch:02d}/50 | {epoch_stats['loss']:.4f} | {acc:.2%} | {total_energy:<9.2f} | {vram_peak:<9.3f} | {eag:<8.4f} | {status_msg}{best_tag}")
161
+
162
+ # Explicit memory cleanup for overnight batch safety
163
+ del model, trainloader
164
+ torch.cuda.empty_cache()
165
+ gc.collect()
166
+
167
+ print(f"{'='*140}\n[FINISH] ResNet-18 on ImageNet300 complete.")
168
+
169
+ if __name__ == '__main__':
170
+ main()