pogzyb commited on
Commit
2163be8
1 Parent(s): be42be4

Upload SegformerForSemanticSegmentation

Browse files
Files changed (2) hide show
  1. config.json +3 -56
  2. model.py +63 -3
config.json CHANGED
@@ -2,27 +2,10 @@
2
  "architectures": [
3
  "SegformerForSemanticSegmentation"
4
  ],
5
- "attention_probs_dropout_prob": 0.0,
6
  "auto_map": {
 
7
  "AutoModelForImageSegmentation": "model.SegformerForSemanticSegmentation"
8
  },
9
- "classifier_dropout_prob": 0.1,
10
- "decoder_hidden_size": 256,
11
- "depths": [
12
- 2,
13
- 2,
14
- 2,
15
- 2
16
- ],
17
- "drop_path_rate": 0.1,
18
- "hidden_act": "gelu",
19
- "hidden_dropout_prob": 0.0,
20
- "hidden_sizes": [
21
- 32,
22
- 64,
23
- 160,
24
- 256
25
- ],
26
  "id2label": {
27
  "0": "skin",
28
  "1": "l_brow",
@@ -43,7 +26,6 @@
43
  "16": "hair",
44
  "17": "hat"
45
  },
46
- "initializer_range": 0.02,
47
  "label2id": {
48
  "cloth": 15,
49
  "ear_r": 8,
@@ -64,43 +46,8 @@
64
  "skin": 0,
65
  "u_lip": 11
66
  },
67
- "layer_norm_eps": 1e-06,
68
- "mlp_ratios": [
69
- 4,
70
- 4,
71
- 4,
72
- 4
73
- ],
74
- "model_type": "segformer",
75
- "num_attention_heads": [
76
- 1,
77
- 2,
78
- 5,
79
- 8
80
- ],
81
- "num_channels": 3,
82
  "num_classes": 18,
83
- "num_encoder_blocks": 4,
84
- "patch_sizes": [
85
- 7,
86
- 3,
87
- 3,
88
- 3
89
- ],
90
- "reshape_last_stage": true,
91
- "semantic_loss_ignore_index": 255,
92
- "sr_ratios": [
93
- 8,
94
- 4,
95
- 2,
96
- 1
97
- ],
98
- "strides": [
99
- 4,
100
- 2,
101
- 2,
102
- 2
103
- ],
104
  "torch_dtype": "float32",
105
- "transformers_version": "4.36.2"
106
  }
 
2
  "architectures": [
3
  "SegformerForSemanticSegmentation"
4
  ],
 
5
  "auto_map": {
6
+ "AutoConfig": "model.FaceSegmenterConfig",
7
  "AutoModelForImageSegmentation": "model.SegformerForSemanticSegmentation"
8
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "id2label": {
10
  "0": "skin",
11
  "1": "l_brow",
 
26
  "16": "hair",
27
  "17": "hat"
28
  },
 
29
  "label2id": {
30
  "cloth": 15,
31
  "ear_r": 8,
 
46
  "skin": 0,
47
  "u_lip": 11
48
  },
49
+ "model_type": "image-segmentation",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  "num_classes": 18,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "torch_dtype": "float32",
52
+ "transformers_version": "4.37.0"
53
  }
model.py CHANGED
@@ -4,6 +4,65 @@ from torch import nn
4
  from transformers.modeling_outputs import SemanticSegmenterOutput
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def encode_down(c_in: int, c_out: int):
8
  return nn.Sequential(
9
  nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=3, padding=1),
@@ -28,7 +87,7 @@ class FaceUNet(nn.Module):
28
  def __init__(self, num_classes: int):
29
  super().__init__()
30
  self.num_classes = num_classes
31
-
32
  self.down_1 = nn.Conv2d(
33
  in_channels=3,
34
  out_channels=64,
@@ -42,6 +101,7 @@ class FaceUNet(nn.Module):
42
 
43
  self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
44
 
 
45
  self.up_1 = decode_up(1024)
46
  self.up_c1 = encode_down(1024, 512)
47
  self.up_2 = decode_up(512)
@@ -83,7 +143,7 @@ class FaceUNet(nn.Module):
83
 
84
 
85
  class Segformer(transformers.PreTrainedModel):
86
- config_class = transformers.SegformerConfig
87
 
88
  def __init__(self, config):
89
  super().__init__(config)
@@ -95,7 +155,7 @@ class Segformer(transformers.PreTrainedModel):
95
 
96
 
97
  class SegformerForSemanticSegmentation(transformers.PreTrainedModel):
98
- config_class = transformers.SegformerConfig
99
 
100
  def __init__(self, config):
101
  super().__init__(config)
 
4
  from transformers.modeling_outputs import SemanticSegmenterOutput
5
 
6
 
7
+ class FaceSegmenterConfig(transformers.PretrainedConfig):
8
+ model_type = "image-segmentation"
9
+
10
+ _id2label = {
11
+ 0: "skin",
12
+ 1: "l_brow",
13
+ 2: "r_brow",
14
+ 3: "l_eye",
15
+ 4: "r_eye",
16
+ 5: "eye_g",
17
+ 6: "l_ear",
18
+ 7: "r_ear",
19
+ 8: "ear_r",
20
+ 9: "nose",
21
+ 10: "mouth",
22
+ 11: "u_lip",
23
+ 12: "l_lip",
24
+ 13: "neck",
25
+ 14: "neck_l",
26
+ 15: "cloth",
27
+ 16: "hair",
28
+ 17: "hat",
29
+ }
30
+
31
+ _label2id = {
32
+ "skin": 0,
33
+ "l_brow": 1,
34
+ "r_brow": 2,
35
+ "l_eye": 3,
36
+ "r_eye": 4,
37
+ "eye_g": 5,
38
+ "l_ear": 6,
39
+ "r_ear": 7,
40
+ "ear_r": 8,
41
+ "nose": 9,
42
+ "mouth": 10,
43
+ "u_lip": 11,
44
+ "l_lip": 12,
45
+ "neck": 13,
46
+ "neck_l": 14,
47
+ "cloth": 15,
48
+ "hair": 16,
49
+ "hat": 17,
50
+ }
51
+
52
+ def __init__(self, **kwargs):
53
+ super().__init__(**kwargs)
54
+ self.id2label = kwargs.get("id2label", self._id2label)
55
+
56
+ # for some reason these are getting convert to strings when used in pipelines
57
+ id_keys = list(self.id2label.keys())
58
+ for label_id in id_keys:
59
+ label_value = self.id2label.pop(label_id)
60
+ self.id2label[int(label_id)] = label_value
61
+
62
+ self.label2id = kwargs.get("label2id", self._label2id)
63
+ self.num_classes = kwargs.get("num_classes", len(self.id2label))
64
+
65
+
66
  def encode_down(c_in: int, c_out: int):
67
  return nn.Sequential(
68
  nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=3, padding=1),
 
87
  def __init__(self, num_classes: int):
88
  super().__init__()
89
  self.num_classes = num_classes
90
+ # unet
91
  self.down_1 = nn.Conv2d(
92
  in_channels=3,
93
  out_channels=64,
 
101
 
102
  self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
103
 
104
+ # Below, `in_channels` again becomes 1024 as we are concatinating.
105
  self.up_1 = decode_up(1024)
106
  self.up_c1 = encode_down(1024, 512)
107
  self.up_2 = decode_up(512)
 
143
 
144
 
145
  class Segformer(transformers.PreTrainedModel):
146
+ config_class = FaceSegmenterConfig
147
 
148
  def __init__(self, config):
149
  super().__init__(config)
 
155
 
156
 
157
  class SegformerForSemanticSegmentation(transformers.PreTrainedModel):
158
+ config_class = FaceSegmenterConfig
159
 
160
  def __init__(self, config):
161
  super().__init__(config)