pogzyb commited on
Commit
7c916ae
1 Parent(s): d9050ba

Upload SegformerForSemanticSegmentation

Browse files
Files changed (3) hide show
  1. config.json +106 -0
  2. model.py +111 -0
  3. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SegformerForSemanticSegmentation"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "auto_map": {
7
+ "AutoModelForImageSegmentation": "model.SegformerForSemanticSegmentation"
8
+ },
9
+ "classifier_dropout_prob": 0.1,
10
+ "decoder_hidden_size": 256,
11
+ "depths": [
12
+ 2,
13
+ 2,
14
+ 2,
15
+ 2
16
+ ],
17
+ "drop_path_rate": 0.1,
18
+ "hidden_act": "gelu",
19
+ "hidden_dropout_prob": 0.0,
20
+ "hidden_sizes": [
21
+ 32,
22
+ 64,
23
+ 160,
24
+ 256
25
+ ],
26
+ "id2label": {
27
+ "0": "skin",
28
+ "1": "l_brow",
29
+ "2": "r_brow",
30
+ "3": "l_eye",
31
+ "4": "r_eye",
32
+ "5": "eye_g",
33
+ "6": "l_ear",
34
+ "7": "r_ear",
35
+ "8": "ear_r",
36
+ "9": "nose",
37
+ "10": "mouth",
38
+ "11": "u_lip",
39
+ "12": "l_lip",
40
+ "13": "neck",
41
+ "14": "neck_l",
42
+ "15": "cloth",
43
+ "16": "hair",
44
+ "17": "hat"
45
+ },
46
+ "initializer_range": 0.02,
47
+ "label2id": {
48
+ "cloth": 15,
49
+ "ear_r": 8,
50
+ "eye_g": 5,
51
+ "hair": 16,
52
+ "hat": 17,
53
+ "l_brow": 1,
54
+ "l_ear": 6,
55
+ "l_eye": 3,
56
+ "l_lip": 12,
57
+ "mouth": 10,
58
+ "neck": 13,
59
+ "neck_l": 14,
60
+ "nose": 9,
61
+ "r_brow": 2,
62
+ "r_ear": 7,
63
+ "r_eye": 4,
64
+ "skin": 0,
65
+ "u_lip": 11
66
+ },
67
+ "layer_norm_eps": 1e-06,
68
+ "mlp_ratios": [
69
+ 4,
70
+ 4,
71
+ 4,
72
+ 4
73
+ ],
74
+ "model_type": "segformer",
75
+ "num_attention_heads": [
76
+ 1,
77
+ 2,
78
+ 5,
79
+ 8
80
+ ],
81
+ "num_channels": 3,
82
+ "num_classes": 18,
83
+ "num_encoder_blocks": 4,
84
+ "patch_sizes": [
85
+ 7,
86
+ 3,
87
+ 3,
88
+ 3
89
+ ],
90
+ "reshape_last_stage": true,
91
+ "semantic_loss_ignore_index": 255,
92
+ "sr_ratios": [
93
+ 8,
94
+ 4,
95
+ 2,
96
+ 1
97
+ ],
98
+ "strides": [
99
+ 4,
100
+ 2,
101
+ 2,
102
+ 2
103
+ ],
104
+ "torch_dtype": "float32",
105
+ "transformers_version": "4.36.2"
106
+ }
model.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import transformers
3
+ from torch import nn
4
+ from transformers.modeling_outputs import SemanticSegmenterOutput
5
+
6
+
7
+ def encode_down(c_in: int, c_out: int):
8
+ return nn.Sequential(
9
+ nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=3, padding=1),
10
+ nn.BatchNorm2d(num_features=c_out),
11
+ nn.ReLU(inplace=True),
12
+ nn.Conv2d(in_channels=c_out, out_channels=c_out, kernel_size=3, padding=1),
13
+ nn.BatchNorm2d(num_features=c_out),
14
+ nn.ReLU(inplace=True),
15
+ )
16
+
17
+
18
+ def decode_up(c: int):
19
+ return nn.ConvTranspose2d(
20
+ in_channels=c,
21
+ out_channels=int(c / 2),
22
+ kernel_size=2,
23
+ stride=2,
24
+ )
25
+
26
+
27
+ class FaceUNet(nn.Module):
28
+ def __init__(self, num_classes: int):
29
+ super().__init__()
30
+ self.num_classes = num_classes
31
+
32
+ self.down_1 = nn.Conv2d(
33
+ in_channels=3,
34
+ out_channels=64,
35
+ kernel_size=3,
36
+ padding=1,
37
+ )
38
+ self.down_2 = encode_down(64, 128)
39
+ self.down_3 = encode_down(128, 256)
40
+ self.down_4 = encode_down(256, 512)
41
+ self.down_5 = encode_down(512, 1024)
42
+
43
+ self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
44
+
45
+ self.up_1 = decode_up(1024)
46
+ self.up_c1 = encode_down(1024, 512)
47
+ self.up_2 = decode_up(512)
48
+ self.up_c2 = encode_down(512, 256)
49
+ self.up_3 = decode_up(256)
50
+ self.up_c3 = encode_down(256, 128)
51
+ self.up_4 = decode_up(128)
52
+ self.up_c4 = encode_down(128, 64)
53
+
54
+ self.segment = nn.Conv2d(
55
+ in_channels=64,
56
+ out_channels=self.num_classes,
57
+ kernel_size=3,
58
+ padding=1,
59
+ )
60
+
61
+ def forward(self, x):
62
+ d1 = self.down_1(x)
63
+ d2 = self.pool(d1)
64
+ d3 = self.down_2(d2)
65
+ d4 = self.pool(d3)
66
+ d5 = self.down_3(d4)
67
+ d6 = self.pool(d5)
68
+ d7 = self.down_4(d6)
69
+ d8 = self.pool(d7)
70
+ d9 = self.down_5(d8)
71
+
72
+ u1 = self.up_1(d9)
73
+ x = self.up_c1(torch.cat([d7, u1], 1))
74
+ u2 = self.up_2(x)
75
+ x = self.up_c2(torch.cat([d5, u2], 1))
76
+ u3 = self.up_3(x)
77
+ x = self.up_c3(torch.cat([d3, u3], 1))
78
+ u4 = self.up_4(x)
79
+ x = self.up_c4(torch.cat([d1, u4], 1))
80
+
81
+ x = self.segment(x)
82
+ return x
83
+
84
+
85
+ class Segformer(transformers.PreTrainedModel):
86
+ config_class = transformers.SegformerConfig
87
+
88
+ def __init__(self, config):
89
+ super().__init__(config)
90
+ self.config = config
91
+ self.model = FaceUNet(num_classes=config.num_classes)
92
+
93
+ def forward(self, tensor):
94
+ return self.model.forward_features(tensor)
95
+
96
+
97
+ class SegformerForSemanticSegmentation(transformers.PreTrainedModel):
98
+ config_class = transformers.SegformerConfig
99
+
100
+ def __init__(self, config):
101
+ super().__init__(config)
102
+ self.config = config
103
+ self.model = FaceUNet(num_classes=config.num_classes)
104
+
105
+ def forward(self, pixel_values, labels=None):
106
+ logits = self.model(pixel_values)
107
+ values = {"logits": logits}
108
+ if labels is not None:
109
+ loss = torch.nn.cross_entropy(logits, labels)
110
+ values["loss"] = loss
111
+ return SemanticSegmenterOutput(**values)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92930e2231ef4b99841c68ab826b59621934f91a27c7ed7e62c849be7a7b6d64
3
+ size 124124040