Upload model
Browse files- config.json +26 -0
- configuration_vitmodel.py +39 -0
- modeling_vitmodel.py +16 -0
- pytorch_model.bin +3 -0
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"VitMemModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.0,
|
6 |
+
"auto_map": {
|
7 |
+
"AutoConfig": "configuration_vitmodel.ViTConfig",
|
8 |
+
"AutoModel": "modeling_vitmodel.VitMemModel"
|
9 |
+
},
|
10 |
+
"encoder_stride": 16,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.0,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"image_size": 224,
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 3072,
|
17 |
+
"layer_norm_eps": 1e-12,
|
18 |
+
"model_type": "vit",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_channels": 3,
|
21 |
+
"num_hidden_layers": 12,
|
22 |
+
"patch_size": 16,
|
23 |
+
"qkv_bias": true,
|
24 |
+
"torch_dtype": "float32",
|
25 |
+
"transformers_version": "4.30.1"
|
26 |
+
}
|
configuration_vitmodel.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
|
3 |
+
class ViTConfig(PretrainedConfig):
|
4 |
+
model_type = "vit"
|
5 |
+
|
6 |
+
def __init__(
|
7 |
+
self,
|
8 |
+
hidden_size=768,
|
9 |
+
num_hidden_layers=12,
|
10 |
+
num_attention_heads=12,
|
11 |
+
intermediate_size=3072,
|
12 |
+
hidden_act="gelu",
|
13 |
+
hidden_dropout_prob=0.0,
|
14 |
+
attention_probs_dropout_prob=0.0,
|
15 |
+
initializer_range=0.02,
|
16 |
+
layer_norm_eps=1e-12,
|
17 |
+
image_size=224,
|
18 |
+
patch_size=16,
|
19 |
+
num_channels=3,
|
20 |
+
qkv_bias=True,
|
21 |
+
encoder_stride=16,
|
22 |
+
**kwargs,
|
23 |
+
):
|
24 |
+
super().__init__(**kwargs)
|
25 |
+
|
26 |
+
self.hidden_size = hidden_size
|
27 |
+
self.num_hidden_layers = num_hidden_layers
|
28 |
+
self.num_attention_heads = num_attention_heads
|
29 |
+
self.intermediate_size = intermediate_size
|
30 |
+
self.hidden_act = hidden_act
|
31 |
+
self.hidden_dropout_prob = hidden_dropout_prob
|
32 |
+
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
33 |
+
self.initializer_range = initializer_range
|
34 |
+
self.layer_norm_eps = layer_norm_eps
|
35 |
+
self.image_size = image_size
|
36 |
+
self.patch_size = patch_size
|
37 |
+
self.num_channels = num_channels
|
38 |
+
self.qkv_bias = qkv_bias
|
39 |
+
self.encoder_stride = encoder_stride
|
modeling_vitmodel.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import timm
|
3 |
+
from transformers import PreTrainedModel
|
4 |
+
from .configuration_vitmodel import ViTConfig
|
5 |
+
|
6 |
+
class VitMemModel(PreTrainedModel):
|
7 |
+
config_class = ViTConfig
|
8 |
+
|
9 |
+
def __init__(self, config: ViTConfig):
|
10 |
+
super().__init__(config)
|
11 |
+
self.model = timm.create_model("vit_base_patch16_224_miil", pretrained=False, num_classes=1)
|
12 |
+
|
13 |
+
def forward(self, tensor, labels=None):
|
14 |
+
vitfeat = self.model(tensor)
|
15 |
+
out = torch.sigmoid(vitfeat)
|
16 |
+
return out
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acb63c96d24c2ca7347982300c11360053a3f93b64dfa9b18057e0c516aef6dc
|
3 |
+
size 343130109
|