Upload model
Browse files- config.json +2 -2
- configuration_bionexttager.py +1 -1
- modeling_bionexttagger.py +5 -31
config.json
CHANGED
@@ -7,7 +7,7 @@
|
|
7 |
"attention_probs_dropout_prob": 0.1,
|
8 |
"augmentation": "unk",
|
9 |
"auto_map": {
|
10 |
-
"AutoConfig": "
|
11 |
"AutoModel": "modeling_bionexttagger.BioNextTaggerModel"
|
12 |
},
|
13 |
"classifier_dropout": null,
|
@@ -64,6 +64,6 @@
|
|
64 |
"transformers_version": "4.37.2",
|
65 |
"type_vocab_size": 2,
|
66 |
"use_cache": true,
|
67 |
-
"version": "0.1.
|
68 |
"vocab_size": 28895
|
69 |
}
|
|
|
7 |
"attention_probs_dropout_prob": 0.1,
|
8 |
"augmentation": "unk",
|
9 |
"auto_map": {
|
10 |
+
"AutoConfig": "configuration_bionexttager.BioNextTaggerConfig",
|
11 |
"AutoModel": "modeling_bionexttagger.BioNextTaggerModel"
|
12 |
},
|
13 |
"classifier_dropout": null,
|
|
|
64 |
"transformers_version": "4.37.2",
|
65 |
"type_vocab_size": 2,
|
66 |
"use_cache": true,
|
67 |
+
"version": "0.1.2",
|
68 |
"vocab_size": 28895
|
69 |
}
|
configuration_bionexttager.py
CHANGED
@@ -13,7 +13,7 @@ class BioNextTaggerConfig(PretrainedConfig):
|
|
13 |
percentage_tags = 0.2,
|
14 |
p_augmentation = 0.5,
|
15 |
crf_reduction = "mean",
|
16 |
-
version="0.1.
|
17 |
**kwargs,
|
18 |
):
|
19 |
self.version = version
|
|
|
13 |
percentage_tags = 0.2,
|
14 |
p_augmentation = 0.5,
|
15 |
crf_reduction = "mean",
|
16 |
+
version="0.1.2",
|
17 |
**kwargs,
|
18 |
):
|
19 |
self.version = version
|
modeling_bionexttagger.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
|
2 |
import os
|
3 |
from typing import Optional, Union
|
4 |
-
from transformers import AutoModel, PreTrainedModel, AutoConfig, BertModel
|
5 |
from transformers.modeling_outputs import TokenClassifierOutput
|
6 |
from torch import nn
|
7 |
from torch.nn import CrossEntropyLoss
|
@@ -10,35 +10,8 @@ from typing import List, Optional
|
|
10 |
|
11 |
import torch
|
12 |
from itertools import islice
|
|
|
13 |
|
14 |
-
class BioNextTaggerConfig(PretrainedConfig):
|
15 |
-
model_type = "crf-tagger"
|
16 |
-
|
17 |
-
def __init__(
|
18 |
-
self,
|
19 |
-
augmentation = "unk",
|
20 |
-
context_size = 64,
|
21 |
-
percentage_tags = 0.2,
|
22 |
-
p_augmentation = 0.5,
|
23 |
-
crf_reduction = "mean",
|
24 |
-
version="0.1.1",
|
25 |
-
**kwargs,
|
26 |
-
):
|
27 |
-
self.version = version
|
28 |
-
self.augmentation = augmentation
|
29 |
-
self.context_size = context_size
|
30 |
-
self.percentage_tags = percentage_tags
|
31 |
-
self.p_augmentation = p_augmentation
|
32 |
-
self.crf_reduction = crf_reduction
|
33 |
-
super().__init__(**kwargs)
|
34 |
-
|
35 |
-
def get_backbonemodel_config(self):
|
36 |
-
backbonemodel_cfg = AutoConfig.from_pretrained(self._name_or_path)#.to_dict()
|
37 |
-
for k in backbonemodel_cfg.to_dict():
|
38 |
-
if hasattr(self, k):
|
39 |
-
setattr(backbonemodel_cfg,k, getattr(self,k))
|
40 |
-
|
41 |
-
return backbonemodel_cfg
|
42 |
|
43 |
NUM_PER_LAYER = 16
|
44 |
|
@@ -49,13 +22,14 @@ class BioNextTaggerModel(PreTrainedModel):
|
|
49 |
def __init__(self, config):
|
50 |
super().__init__(config)
|
51 |
self.num_labels = config.num_labels
|
52 |
-
|
53 |
self.bert = BertModel(config.get_backbonemodel_config(), add_pooling_layer=False)
|
54 |
#AutoModel.from_pretrained(config._name_or_path,
|
55 |
# config=config.get_backbonemodel_config(),
|
56 |
# add_pooling_layer=False)
|
|
|
57 |
# self.vocab_size = config.vocab_size
|
58 |
-
classifier_dropout = (config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob)
|
59 |
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
60 |
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
61 |
self.dense_activation = nn.GELU(approximate='none')
|
|
|
1 |
|
2 |
import os
|
3 |
from typing import Optional, Union
|
4 |
+
from transformers import AutoModel, PreTrainedModel, AutoConfig, BertModel
|
5 |
from transformers.modeling_outputs import TokenClassifierOutput
|
6 |
from torch import nn
|
7 |
from torch.nn import CrossEntropyLoss
|
|
|
10 |
|
11 |
import torch
|
12 |
from itertools import islice
|
13 |
+
from .configuration_bionexttager import BioNextTaggerConfig
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
NUM_PER_LAYER = 16
|
17 |
|
|
|
22 |
def __init__(self, config):
|
23 |
super().__init__(config)
|
24 |
self.num_labels = config.num_labels
|
25 |
+
print("LOAD BERT?", flush=True)
|
26 |
self.bert = BertModel(config.get_backbonemodel_config(), add_pooling_layer=False)
|
27 |
#AutoModel.from_pretrained(config._name_or_path,
|
28 |
# config=config.get_backbonemodel_config(),
|
29 |
# add_pooling_layer=False)
|
30 |
+
print("LOADED BERT", flush=True)
|
31 |
# self.vocab_size = config.vocab_size
|
32 |
+
#classifier_dropout = (config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob)
|
33 |
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
34 |
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
35 |
self.dense_activation = nn.GELU(approximate='none')
|