Shanmuk4622 commited on
Commit
abc8030
·
verified ·
1 Parent(s): d0a541d

Upload test3/eden_UNet_ImageNet.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. test3/eden_UNet_ImageNet.py +166 -0
test3/eden_UNet_ImageNet.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import torchvision
5
+ import torchvision.transforms as transforms
6
+ from torch.utils.data import DataLoader, random_split
7
+ from sklearn.metrics import f1_score, precision_score, recall_score
8
+ from codecarbon import EmissionsTracker
9
+ from thop import profile
10
+ from tqdm import tqdm
11
+ import time, pandas as pd, numpy as np, os, warnings, copy, gc
12
+
13
+ # --- Configuration ---
14
+ MODEL_NAME = "unet_classifier_EDEN"
15
+ DATASET_NAME = "CustomImageNet300"
16
+ # Path to the folder containing your 300 class folders directly
17
+ DATA_PATH = r'C:\Users\shanm\Dataset Download\custom image net'
18
+ BATCH_SIZE = 64
19
+ ACCUMULATION_STEPS = 8 # Effective Batch Size = 512
20
+ EPOCHS = 20
21
+ E_UNFREEZE = 10
22
+ LAMBDA_L1 = 1e-5
23
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+
25
+ SAVE_DIR = "saved_models"
26
+ os.makedirs(SAVE_DIR, exist_ok=True)
27
+ CSV_FILENAME = f"{MODEL_NAME}_{DATASET_NAME}_stats.csv"
28
+
29
+ warnings.filterwarnings("ignore")
30
+ os.environ["CODECARBON_LOG_LEVEL"] = "error"
31
+
32
+ # --- U-Net Adaptation for Classification ---
33
+ class UNetClassifier(nn.Module):
34
+ def __init__(self, num_classes=300):
35
+ super(UNetClassifier, self).__init__()
36
+ # Encoder: Using a ResNet18 backbone
37
+ self.backbone = torchvision.models.resnet18(weights='IMAGENET1K_V1')
38
+ self.encoder = nn.Sequential(*list(self.backbone.children())[:-2])
39
+ self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
40
+ self.classifier = nn.Linear(512, num_classes)
41
+
42
+ def forward(self, x):
43
+ x = self.encoder(x)
44
+ x = self.avgpool(x)
45
+ x = torch.flatten(x, 1)
46
+ x = self.classifier(x)
47
+ return x
48
+
49
+ def main():
50
+ # --- Phase 1: High-Resolution Initialization ---
51
+ transform = transforms.Compose([
52
+ transforms.Resize(256),
53
+ transforms.CenterCrop(224),
54
+ transforms.ToTensor(),
55
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
56
+ ])
57
+
58
+ print(f"[*] Loading {DATASET_NAME} from disk (80/20 Random Split)...")
59
+ # Load from root since your folders are flat
60
+ full_dataset = torchvision.datasets.ImageFolder(root=DATA_PATH, transform=transform)
61
+
62
+ # Split into 80% Train, 20% Val
63
+ train_size = int(0.8 * len(full_dataset))
64
+ val_size = len(full_dataset) - train_size
65
+ train_dataset, _ = random_split(
66
+ full_dataset, [train_size, val_size],
67
+ generator=torch.Generator().manual_seed(42)
68
+ )
69
+
70
+ trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
71
+
72
+ # --- Model Setup ---
73
+ model = UNetClassifier(num_classes=300)
74
+
75
+ # 1. Profile on clone to avoid hook attribute error
76
+ print("[*] Calculating hardware metrics...")
77
+ model_for_profile = copy.deepcopy(model).to(DEVICE)
78
+ dummy_input = torch.randn(1, 3, 224, 224).to(DEVICE)
79
+ flops, params = profile(model_for_profile, inputs=(dummy_input, ), verbose=False)
80
+ del model_for_profile
81
+
82
+ # 2. Initially freeze encoder
83
+ for param in model.encoder.parameters():
84
+ param.requires_grad = False
85
+
86
+ model.to(DEVICE)
87
+
88
+ criterion = nn.CrossEntropyLoss()
89
+ optimizer = optim.AdamW(model.parameters(), lr=1e-3)
90
+ scaler = torch.cuda.amp.GradScaler()
91
+ tracker = EmissionsTracker(measure_power_secs=1, save_to_file=False, log_level='error')
92
+
93
+ results = []
94
+ cumulative_total_energy = 0
95
+ best_acc = 0.0
96
+
97
+ print(f"\n[MODEL INFO] FLOPs: {flops/1e9:.2f} G | Parameters: {params/1e6:.2f} M | Classes: 300")
98
+ print(f"{'='*140}")
99
+ print(f"{'Epoch':<6} | {'Loss':<7} | {'Acc':<7} | {'Total(J)':<9} | {'VRAM(GB)':<9} | {'EAG':<8} | {'Status'}")
100
+ print(f"{'-'*140}")
101
+
102
+ for epoch in range(1, EPOCHS + 1):
103
+ if epoch == E_UNFREEZE:
104
+ for param in model.parameters(): param.requires_grad = True
105
+ for pg in optimizer.param_groups: pg['lr'] = 1e-5
106
+ status_msg = "UNFROZEN"
107
+ else:
108
+ status_msg = "FROZEN" if epoch < E_UNFREEZE else "FINE-TUNING"
109
+
110
+ model.train()
111
+ tracker.start()
112
+ epoch_start = time.time()
113
+ running_loss, all_preds, all_labels = 0.0, [], []
114
+
115
+ # Real-time progress bar
116
+ pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch:02d}", leave=False)
117
+
118
+ optimizer.zero_grad()
119
+ for i, (inputs, labels) in pbar:
120
+ inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
121
+ with torch.cuda.amp.autocast():
122
+ outputs = model(inputs)
123
+ cls_loss = criterion(outputs, labels)
124
+ l1_penalty = sum(p.abs().sum() for p in model.parameters() if p.requires_grad)
125
+ loss = (cls_loss + LAMBDA_L1 * l1_penalty) / ACCUMULATION_STEPS
126
+
127
+ scaler.scale(loss).backward()
128
+ if (i + 1) % ACCUMULATION_STEPS == 0:
129
+ scaler.unscale_(optimizer)
130
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
131
+ scaler.step(optimizer); scaler.update(); optimizer.zero_grad()
132
+
133
+ running_loss += cls_loss.item()
134
+ _, predicted = torch.max(outputs.data, 1)
135
+ all_preds.extend(predicted.cpu().numpy()); all_labels.extend(labels.cpu().numpy())
136
+ pbar.set_postfix({'loss': f"{cls_loss.item():.4f}"})
137
+
138
+ emissions_kg = tracker.stop()
139
+ duration = time.time() - epoch_start
140
+ e_tot = (tracker.final_emissions_data.gpu_energy + tracker.final_emissions_data.cpu_energy + tracker.final_emissions_data.ram_energy) * 3600000
141
+ cumulative_total_energy += e_tot
142
+ acc = (np.array(all_preds) == np.array(all_labels)).mean()
143
+ vram_peak = torch.cuda.max_memory_allocated(DEVICE) / (1024**3)
144
+ eag = acc / (e_tot / 1000) if e_tot > 0 else 0
145
+
146
+ # Detailed Audit Row
147
+ stats = {
148
+ "epoch": epoch, "status": status_msg, "loss": running_loss / len(trainloader),
149
+ "accuracy": acc, "total_energy_j": e_tot, "cumulative_energy_j": cumulative_total_energy,
150
+ "carbon_kg": emissions_kg, "vram_gb": vram_peak, "eag_metric": eag,
151
+ "latency_ms": (duration / len(trainloader)) * 1000,
152
+ "model_flops": flops, "model_params": params
153
+ }
154
+ results.append(stats)
155
+ pd.DataFrame(results).to_csv(CSV_FILENAME, index=False)
156
+
157
+ best_tag = "*" if acc > best_acc else ""
158
+ if acc > best_acc: best_acc = acc; torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"BEST_{MODEL_NAME}_{DATASET_NAME}.pth"))
159
+ print(f"{epoch:02d}/50 | {stats['loss']:.4f} | {acc:.2%} | {e_tot:<9.2f} | {vram_peak:<9.3f} | {eag:<8.4f} | {status_msg}{best_tag}")
160
+
161
+ # Memory Flush for Batch Script
162
+ del model, trainloader
163
+ torch.cuda.empty_cache(); gc.collect()
164
+
165
+ if __name__ == '__main__':
166
+ main()