praeclarumjj3 commited on
Commit
890231d
1 Parent(s): 2766341

Add documentation

Browse files
Files changed (3) hide show
  1. README.md +63 -0
  2. config.json +408 -0
  3. preprocessor_config.json +1939 -0
README.md CHANGED
@@ -14,3 +14,66 @@ widget:
14
  - src: https://praeclarumjj3.github.io/files/coco.jpeg
15
  example_title: Person
16
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  - src: https://praeclarumjj3.github.io/files/coco.jpeg
15
  example_title: Person
16
  ---
17
+
18
+ # OneFormer
19
+
20
+ OneFormer model trained on the ADE20k dataset (large-sized version, Swin backbone). It was introduced in the paper [OneFormer: One Transformer to Rule Universal Image Segmentation](https://arxiv.org/abs/2211.06220) by Jain et al. and first released in [this repository](https://github.com/SHI-Labs/OneFormer).
21
+
22
+ ![model image](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/oneformer_teaser.png)
23
+
24
+ ## Model description
25
+
26
+ OneFormer is the first multi-task universal image segmentation framework based on transformers. OneFormer needs to be trained only once with a single universal architecture, a single model, and on a single dataset, to outperform existing frameworks across semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference, all with a single model.
27
+
28
+ ![model image](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/oneformer_architecture.png)
29
+
30
+ ## Intended uses & limitations
31
+
32
+ You can use this particular checkpoint for semantic, instance and panoptic segmentation. See the [model hub](https://huggingface.co/models?search=oneformer) to look for other fine-tuned versions on a different dataset.
33
+
34
+ ### How to use
35
+
36
+ Here is how to use this model:
37
+
38
+ ```python
39
+ from transformers import OneFormerFeatureExtractor, OneFormerForUniversalSegmentation
40
+ from PIL import Image
41
+ import requests
42
+ url = "https://huggingface.co/datasets/shi-labs/oneformer_demo/blob/main/ade20k.jpeg"
43
+ image = Image.open(requests.get(url, stream=True).raw)
44
+
45
+ # Loading a single model for all three tasks
46
+ feature_extractor = OneFormerFeatureExtractor.from_pretrained("shi-labs/oneformer_ade20k_swin_large")
47
+ model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_ade20k_swin_large")
48
+
49
+ # Semantic Segmentation
50
+ semantic_inputs = feature_extractor(images=image, ["semantic"] return_tensors="pt")
51
+ semantic_outputs = model(**semantic_inputs)
52
+ # pass through feature_extractor for postprocessing
53
+ predicted_semantic_map = feature_extractor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
54
+
55
+ # Instance Segmentation
56
+ instance_inputs = feature_extractor(images=image, ["instance"] return_tensors="pt")
57
+ instance_outputs = model(**instance_inputs)
58
+ # pass through feature_extractor for postprocessing
59
+ predicted_instance_map = feature_extractor.post_process_instance_segmentation(outputs, target_sizes=[image.size[::-1]])[0]["segmentation"]
60
+
61
+ # Panoptic Segmentation
62
+ panoptic_inputs = feature_extractor(images=image, ["panoptic"] return_tensors="pt")
63
+ panoptic_outputs = model(**panoptic_inputs)
64
+ # pass through feature_extractor for postprocessing
65
+ predicted_semantic_map = feature_extractor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]["segmentation"]
66
+ ```
67
+
68
+ For more examples, please refer to the [documentation](https://huggingface.co/docs/transformers/master/en/model_doc/oneformer).
69
+
70
+ ### Citation
71
+
72
+ ```bibtex
73
+ @article{jain2022oneformer,
74
+ title={{OneFormer: One Transformer to Rule Universal Image Segmentation}},
75
+ author={Jitesh Jain and Jiachen Li and MangTik Chiu and Ali Hassani and Nikita Orlov and Humphrey Shi},
76
+ journal={arXiv},
77
+ year={2022}
78
+ }
79
+ ```
config.json ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OneFormerForUniversalSegmentation"
4
+ ],
5
+ "backbone_config": {
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "depths": [
8
+ 2,
9
+ 2,
10
+ 18,
11
+ 2
12
+ ],
13
+ "drop_path_rate": 0.3,
14
+ "embed_dim": 192,
15
+ "encoder_stride": 32,
16
+ "feature_channels": [
17
+ 192,
18
+ 384,
19
+ 768,
20
+ 1536
21
+ ],
22
+ "hidden_act": "gelu",
23
+ "hidden_dropout_prob": 0.0,
24
+ "image_size": 384,
25
+ "mlp_ratio": 4.0,
26
+ "num_channels": 3,
27
+ "num_heads": [
28
+ 6,
29
+ 12,
30
+ 24,
31
+ 48
32
+ ],
33
+ "patch_norm": true,
34
+ "patch_size": 4,
35
+ "qkv_bias": true,
36
+ "strides": [
37
+ 4,
38
+ 8,
39
+ 16,
40
+ 32
41
+ ],
42
+ "use_absolute_embeddings": false,
43
+ "window_size": 12
44
+ },
45
+ "decoder_config": {
46
+ "common_stride": 4,
47
+ "conv_dim": 256,
48
+ "decoder_layers": 10,
49
+ "dim_feedforward": 2048,
50
+ "dropout": 0.1,
51
+ "encoder_feedforward_dim": 1024,
52
+ "encoder_layers": 6,
53
+ "enforce_input_proj": false,
54
+ "hidden_dim": 256,
55
+ "mask_dim": 256,
56
+ "norm": "GN",
57
+ "num_heads": 8,
58
+ "pre_norm": false,
59
+ "query_dec_layers": 2,
60
+ "use_task_norm": true
61
+ },
62
+ "general_config": {
63
+ "backbone_type": "swin",
64
+ "class_weight": 2.0,
65
+ "contrastive_temperature": 0.07,
66
+ "contrastive_weight": 0.5,
67
+ "deep_supervision": true,
68
+ "dice_weight": 5.0,
69
+ "ignore_value": 255,
70
+ "importance_sample_ratio": 0.75,
71
+ "init_std": 0.02,
72
+ "init_xavier_std": 1.0,
73
+ "is_train": false,
74
+ "layer_norm_eps": 1e-05,
75
+ "mask_weight": 5.0,
76
+ "no_object_weight": 0.1,
77
+ "num_classes": 150,
78
+ "num_queries": 250,
79
+ "output_auxiliary_logits": true,
80
+ "oversample_ratio": 3.0,
81
+ "train_num_points": 12544,
82
+ "use_auxiliary_loss": true
83
+ },
84
+ "hidden_size": 256,
85
+ "id2label": {
86
+ "0": "wall",
87
+ "1": "building",
88
+ "2": "sky",
89
+ "3": "floor",
90
+ "4": "tree",
91
+ "5": "ceiling",
92
+ "6": "road, route",
93
+ "7": "bed",
94
+ "8": "window ",
95
+ "9": "grass",
96
+ "10": "cabinet",
97
+ "11": "sidewalk, pavement",
98
+ "12": "person",
99
+ "13": "earth, ground",
100
+ "14": "door",
101
+ "15": "table",
102
+ "16": "mountain, mount",
103
+ "17": "plant",
104
+ "18": "curtain",
105
+ "19": "chair",
106
+ "20": "car",
107
+ "21": "water",
108
+ "22": "painting, picture",
109
+ "23": "sofa",
110
+ "24": "shelf",
111
+ "25": "house",
112
+ "26": "sea",
113
+ "27": "mirror",
114
+ "28": "rug",
115
+ "29": "field",
116
+ "30": "armchair",
117
+ "31": "seat",
118
+ "32": "fence",
119
+ "33": "desk",
120
+ "34": "rock, stone",
121
+ "35": "wardrobe, closet, press",
122
+ "36": "lamp",
123
+ "37": "tub",
124
+ "38": "rail",
125
+ "39": "cushion",
126
+ "40": "base, pedestal, stand",
127
+ "41": "box",
128
+ "42": "column, pillar",
129
+ "43": "signboard, sign",
130
+ "44": "chest of drawers, chest, bureau, dresser",
131
+ "45": "counter",
132
+ "46": "sand",
133
+ "47": "sink",
134
+ "48": "skyscraper",
135
+ "49": "fireplace",
136
+ "50": "refrigerator, icebox",
137
+ "51": "grandstand, covered stand",
138
+ "52": "path",
139
+ "53": "stairs",
140
+ "54": "runway",
141
+ "55": "case, display case, showcase, vitrine",
142
+ "56": "pool table, billiard table, snooker table",
143
+ "57": "pillow",
144
+ "58": "screen door, screen",
145
+ "59": "stairway, staircase",
146
+ "60": "river",
147
+ "61": "bridge, span",
148
+ "62": "bookcase",
149
+ "63": "blind, screen",
150
+ "64": "coffee table",
151
+ "65": "toilet, can, commode, crapper, pot, potty, stool, throne",
152
+ "66": "flower",
153
+ "67": "book",
154
+ "68": "hill",
155
+ "69": "bench",
156
+ "70": "countertop",
157
+ "71": "stove",
158
+ "72": "palm, palm tree",
159
+ "73": "kitchen island",
160
+ "74": "computer",
161
+ "75": "swivel chair",
162
+ "76": "boat",
163
+ "77": "bar",
164
+ "78": "arcade machine",
165
+ "79": "hovel, hut, hutch, shack, shanty",
166
+ "80": "bus",
167
+ "81": "towel",
168
+ "82": "light",
169
+ "83": "truck",
170
+ "84": "tower",
171
+ "85": "chandelier",
172
+ "86": "awning, sunshade, sunblind",
173
+ "87": "street lamp",
174
+ "88": "booth",
175
+ "89": "tv",
176
+ "90": "plane",
177
+ "91": "dirt track",
178
+ "92": "clothes",
179
+ "93": "pole",
180
+ "94": "land, ground, soil",
181
+ "95": "bannister, banister, balustrade, balusters, handrail",
182
+ "96": "escalator, moving staircase, moving stairway",
183
+ "97": "ottoman, pouf, pouffe, puff, hassock",
184
+ "98": "bottle",
185
+ "99": "buffet, counter, sideboard",
186
+ "100": "poster, posting, placard, notice, bill, card",
187
+ "101": "stage",
188
+ "102": "van",
189
+ "103": "ship",
190
+ "104": "fountain",
191
+ "105": "conveyer belt, conveyor belt, conveyer, conveyor, transporter",
192
+ "106": "canopy",
193
+ "107": "washer, automatic washer, washing machine",
194
+ "108": "plaything, toy",
195
+ "109": "pool",
196
+ "110": "stool",
197
+ "111": "barrel, cask",
198
+ "112": "basket, handbasket",
199
+ "113": "falls",
200
+ "114": "tent",
201
+ "115": "bag",
202
+ "116": "minibike, motorbike",
203
+ "117": "cradle",
204
+ "118": "oven",
205
+ "119": "ball",
206
+ "120": "food, solid food",
207
+ "121": "step, stair",
208
+ "122": "tank, storage tank",
209
+ "123": "trade name",
210
+ "124": "microwave",
211
+ "125": "pot",
212
+ "126": "animal",
213
+ "127": "bicycle",
214
+ "128": "lake",
215
+ "129": "dishwasher",
216
+ "130": "screen",
217
+ "131": "blanket, cover",
218
+ "132": "sculpture",
219
+ "133": "hood, exhaust hood",
220
+ "134": "sconce",
221
+ "135": "vase",
222
+ "136": "traffic light",
223
+ "137": "tray",
224
+ "138": "trash can",
225
+ "139": "fan",
226
+ "140": "pier",
227
+ "141": "crt screen",
228
+ "142": "plate",
229
+ "143": "monitor",
230
+ "144": "bulletin board",
231
+ "145": "shower",
232
+ "146": "radiator",
233
+ "147": "glass, drinking glass",
234
+ "148": "clock",
235
+ "149": "flag"
236
+ },
237
+ "init_std": 0.02,
238
+ "init_xavier_std": 1.0,
239
+ "label2id": {
240
+ "animal": 126,
241
+ "arcade machine": 78,
242
+ "armchair": 30,
243
+ "awning, sunshade, sunblind": 86,
244
+ "bag": 115,
245
+ "ball": 119,
246
+ "bannister, banister, balustrade, balusters, handrail": 95,
247
+ "bar": 77,
248
+ "barrel, cask": 111,
249
+ "base, pedestal, stand": 40,
250
+ "basket, handbasket": 112,
251
+ "bed": 7,
252
+ "bench": 69,
253
+ "bicycle": 127,
254
+ "blanket, cover": 131,
255
+ "blind, screen": 63,
256
+ "boat": 76,
257
+ "book": 67,
258
+ "bookcase": 62,
259
+ "booth": 88,
260
+ "bottle": 98,
261
+ "box": 41,
262
+ "bridge, span": 61,
263
+ "buffet, counter, sideboard": 99,
264
+ "building": 1,
265
+ "bulletin board": 144,
266
+ "bus": 80,
267
+ "cabinet": 10,
268
+ "canopy": 106,
269
+ "car": 20,
270
+ "case, display case, showcase, vitrine": 55,
271
+ "ceiling": 5,
272
+ "chair": 19,
273
+ "chandelier": 85,
274
+ "chest of drawers, chest, bureau, dresser": 44,
275
+ "clock": 148,
276
+ "clothes": 92,
277
+ "coffee table": 64,
278
+ "column, pillar": 42,
279
+ "computer": 74,
280
+ "conveyer belt, conveyor belt, conveyer, conveyor, transporter": 105,
281
+ "counter": 45,
282
+ "countertop": 70,
283
+ "cradle": 117,
284
+ "crt screen": 141,
285
+ "curtain": 18,
286
+ "cushion": 39,
287
+ "desk": 33,
288
+ "dirt track": 91,
289
+ "dishwasher": 129,
290
+ "door": 14,
291
+ "earth, ground": 13,
292
+ "escalator, moving staircase, moving stairway": 96,
293
+ "falls": 113,
294
+ "fan": 139,
295
+ "fence": 32,
296
+ "field": 29,
297
+ "fireplace": 49,
298
+ "flag": 149,
299
+ "floor": 3,
300
+ "flower": 66,
301
+ "food, solid food": 120,
302
+ "fountain": 104,
303
+ "glass, drinking glass": 147,
304
+ "grandstand, covered stand": 51,
305
+ "grass": 9,
306
+ "hill": 68,
307
+ "hood, exhaust hood": 133,
308
+ "house": 25,
309
+ "hovel, hut, hutch, shack, shanty": 79,
310
+ "kitchen island": 73,
311
+ "lake": 128,
312
+ "lamp": 36,
313
+ "land, ground, soil": 94,
314
+ "light": 82,
315
+ "microwave": 124,
316
+ "minibike, motorbike": 116,
317
+ "mirror": 27,
318
+ "monitor": 143,
319
+ "mountain, mount": 16,
320
+ "ottoman, pouf, pouffe, puff, hassock": 97,
321
+ "oven": 118,
322
+ "painting, picture": 22,
323
+ "palm, palm tree": 72,
324
+ "path": 52,
325
+ "person": 12,
326
+ "pier": 140,
327
+ "pillow": 57,
328
+ "plane": 90,
329
+ "plant": 17,
330
+ "plate": 142,
331
+ "plaything, toy": 108,
332
+ "pole": 93,
333
+ "pool": 109,
334
+ "pool table, billiard table, snooker table": 56,
335
+ "poster, posting, placard, notice, bill, card": 100,
336
+ "pot": 125,
337
+ "radiator": 146,
338
+ "rail": 38,
339
+ "refrigerator, icebox": 50,
340
+ "river": 60,
341
+ "road, route": 6,
342
+ "rock, stone": 34,
343
+ "rug": 28,
344
+ "runway": 54,
345
+ "sand": 46,
346
+ "sconce": 134,
347
+ "screen": 130,
348
+ "screen door, screen": 58,
349
+ "sculpture": 132,
350
+ "sea": 26,
351
+ "seat": 31,
352
+ "shelf": 24,
353
+ "ship": 103,
354
+ "shower": 145,
355
+ "sidewalk, pavement": 11,
356
+ "signboard, sign": 43,
357
+ "sink": 47,
358
+ "sky": 2,
359
+ "skyscraper": 48,
360
+ "sofa": 23,
361
+ "stage": 101,
362
+ "stairs": 53,
363
+ "stairway, staircase": 59,
364
+ "step, stair": 121,
365
+ "stool": 110,
366
+ "stove": 71,
367
+ "street lamp": 87,
368
+ "swivel chair": 75,
369
+ "table": 15,
370
+ "tank, storage tank": 122,
371
+ "tent": 114,
372
+ "toilet, can, commode, crapper, pot, potty, stool, throne": 65,
373
+ "towel": 81,
374
+ "tower": 84,
375
+ "trade name": 123,
376
+ "traffic light": 136,
377
+ "trash can": 138,
378
+ "tray": 137,
379
+ "tree": 4,
380
+ "truck": 83,
381
+ "tub": 37,
382
+ "tv": 89,
383
+ "van": 102,
384
+ "vase": 135,
385
+ "wall": 0,
386
+ "wardrobe, closet, press": 35,
387
+ "washer, automatic washer, washing machine": 107,
388
+ "water": 21,
389
+ "window ": 8
390
+ },
391
+ "model_type": "oneformer",
392
+ "num_attention_heads": 8,
393
+ "num_hidden_layers": 10,
394
+ "output_attentions": true,
395
+ "output_hidden_states": true,
396
+ "text_encoder_config": {
397
+ "max_seq_len": 77,
398
+ "task_seq_len": 77,
399
+ "text_encoder_context_length": 77,
400
+ "text_encoder_n_ctx": 16,
401
+ "text_encoder_num_layers": 6,
402
+ "text_encoder_proj_layers": 2,
403
+ "text_encoder_vocab_size": 49408,
404
+ "text_encoder_width": 256
405
+ },
406
+ "torch_dtype": "float32",
407
+ "transformers_version": "4.25.0.dev0"
408
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,1939 @@