mykeysid10 commited on
Commit
9d8d919
·
1 Parent(s): 717b608

Upload cloud_coverage_pipeline.py

Browse files
Files changed (1) hide show
  1. cloud_coverage_pipeline.py +164 -0
cloud_coverage_pipeline.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing Libraries
2
+ import torch, os
3
+ import numpy as np
4
+ import cv2
5
+ from torch import nn
6
+ import timm
7
+ from transformers import DistilBertModel, DistilBertConfig
8
+ from torch.utils.data import Dataset, DataLoader
9
+ from tqdm.autonotebook import tqdm
10
+ import pickle
11
+ os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
12
+
13
+
14
+ # Trained Model Configurations
15
+ CFG = {
16
+ "debug": False,
17
+ "captions_path": ".",
18
+ "batch_size": 64,
19
+ "num_workers": 4,
20
+ "head_lr": 1e-3,
21
+ "image_encoder_lr": 1e-4,
22
+ "text_encoder_lr": 1e-5,
23
+ "weight_decay": 1e-3,
24
+ "patience": 1,
25
+ "factor": 0.8,
26
+ "epochs": 12,
27
+ "device": "cpu",
28
+ "model_name": 'resnet50',
29
+ "image_embedding": 2048,
30
+ "text_encoder_model": "distilbert-base-uncased",
31
+ "text_embedding": 768,
32
+ "text_tokenizer": "distilbert-base-uncased",
33
+ "max_length": 200,
34
+ "pretrained": True,
35
+ "trainable": True,
36
+ "temperature": 1.0,
37
+ "size": 224,
38
+ "num_projection_layers": 1,
39
+ "projection_dim": 256,
40
+ "dropout": 0.1
41
+ }
42
+
43
+
44
+ # Loading Finetuned Clip Model to the below class format
45
+ class CLIPModel(nn.Module):
46
+ def __init__(
47
+ self,
48
+ temperature=CFG["temperature"],
49
+ image_embedding=CFG["image_embedding"],
50
+ text_embedding=CFG["text_embedding"],
51
+ ):
52
+ super().__init__()
53
+ self.image_encoder = ImageEncoder()
54
+ self.text_encoder = TextEncoder()
55
+ self.image_projection = ProjectionHead(embedding_dim=image_embedding)
56
+ self.text_projection = ProjectionHead(embedding_dim=text_embedding)
57
+ self.temperature = temperature
58
+
59
+
60
+ # Image Encoder Class to extract features using finetuned clip's Resnet Image Encoder
61
+ class ImageEncoder(nn.Module):
62
+ def __init__(self, model_name=CFG["model_name"], pretrained=CFG["pretrained"], trainable=CFG["trainable"]):
63
+ super().__init__()
64
+ self.model = timm.create_model(model_name, pretrained, num_classes=0, global_pool="avg")
65
+ for p in self.model.parameters():
66
+ p.requires_grad = trainable
67
+
68
+ def forward(self, x):
69
+ return self.model(x)
70
+
71
+
72
+ # Text Encoder - Optional in inference
73
+ class TextEncoder(nn.Module):
74
+ def __init__(self, model_name=CFG["text_encoder_model"], pretrained=CFG["pretrained"],
75
+ trainable=CFG["trainable"]):
76
+ super().__init__()
77
+ if pretrained:
78
+ self.model = DistilBertModel.from_pretrained(model_name)
79
+ else:
80
+ self.model = DistilBertModel(config=DistilBertConfig())
81
+
82
+ for p in self.model.parameters():
83
+ p.requires_grad = trainable
84
+
85
+ self.target_token_idx = 0
86
+
87
+ def forward(self, input_ids, attention_mask):
88
+ output = self.model(input_ids=input_ids, attention_mask=attention_mask)
89
+ last_hidden_state = output.last_hidden_state
90
+ return last_hidden_state[:, self.target_token_idx, :]
91
+
92
+
93
+ # Projection Class - Optional in inference
94
+ class ProjectionHead(nn.Module):
95
+ def __init__(
96
+ self,
97
+ embedding_dim,
98
+ projection_dim=CFG["projection_dim"],
99
+ dropout=CFG["dropout"]
100
+ ):
101
+ super().__init__()
102
+ self.projection = nn.Linear(embedding_dim, projection_dim)
103
+ self.gelu = nn.GELU()
104
+ self.fc = nn.Linear(projection_dim, projection_dim)
105
+ self.dropout = nn.Dropout(dropout)
106
+ self.layer_norm = nn.LayerNorm(projection_dim)
107
+
108
+ def forward(self, x):
109
+ projected = self.projection(x)
110
+ x = self.gelu(projected)
111
+ x = self.fc(x)
112
+ x = self.dropout(x)
113
+ x = x + projected
114
+ x = self.layer_norm(x)
115
+ return x
116
+
117
+
118
+ # Class to transform image to custom data format
119
+ class SkyImage(Dataset):
120
+ def __init__(self, img, label):
121
+ self.img = img
122
+ self.img_label = label
123
+
124
+ def __len__(self):
125
+ return len(self.img)
126
+
127
+ def __getitem__(self, idx):
128
+ image = cv2.resize(self.img[idx], (244, 244))
129
+ # image = cv2.cvtColor(self.img[idx], cv2.COLOR_BGR2RGB)
130
+ # image = cv2.resize(image, (244, 244))
131
+ image = np.moveaxis(image, -1, 0)
132
+ label = self.img_label[idx]
133
+ return image, label
134
+
135
+
136
+ # Method to extract features from finetuned clip model
137
+ def get_features(clip_model, dataset):
138
+ features, label, embeddings = [], [], []
139
+ with torch.no_grad():
140
+ for images, labels in tqdm(DataLoader(dataset, batch_size=64)):
141
+ image_input = torch.tensor(np.stack(images)).cpu().float()
142
+ image_features = clip_model.image_encoder(image_input)
143
+ features.append(image_features)
144
+ label.append(labels)
145
+ return torch.cat(features), torch.cat(label).cpu()
146
+
147
+
148
+ # Loading Clip and Catboost models
149
+ CTBR_model = pickle.load(open("catboost_model.sav", 'rb'))
150
+ clip_model = CLIPModel().to(CFG["device"])
151
+ clip_model.load_state_dict(torch.load("clip_model.pt", map_location=CFG["device"]))
152
+ clip_model.eval()
153
+
154
+
155
+ # Method to calculate cloud coverage
156
+ def predict_cloud_coverage(image):
157
+ img, lbl = [image], [0]
158
+ # Transforming Data into custom format
159
+ test_image = SkyImage(img, lbl)
160
+ # Extracting Features from Finetuned CLIP model
161
+ features, label = get_features(clip_model, test_image)
162
+ # Predicting Cloud Coverage based on extracted features
163
+ pred_cloud_coverage = CTBR_model.predict(features.cpu().numpy())
164
+ return(round(max(0.0, min(100.0, pred_cloud_coverage[0])), 1))