3v324v23 commited on
Commit
e77f999
·
1 Parent(s): 68eff11

update model name

Browse files
modeling/config.py CHANGED
@@ -10,7 +10,7 @@ class DebertaConfig(PretrainedConfig):
10
  model_type = 'deberta-v2'
11
 
12
  def __init__(self,
13
- vocab_size_or_config_json_file,
14
  hidden_size=768,
15
  num_hidden_layers=12,
16
  num_attention_heads=12,
@@ -28,7 +28,8 @@ class DebertaConfig(PretrainedConfig):
28
  ffn_type=0,
29
  label_smoothing=None,
30
  num_qkv=0,
31
- seg_emb=False):
 
32
  """Constructs BertConfig.
33
 
34
  Args:
@@ -53,35 +54,26 @@ class DebertaConfig(PretrainedConfig):
53
  initializer_range: The sttdev of the truncated_normal_initializer for
54
  initializing all weight matrices.
55
  """
56
- if isinstance(vocab_size_or_config_json_file, str):
57
- with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
58
- json_config = json.loads(reader.read())
59
- for key, value in json_config.items():
60
- self.__dict__[key] = value
61
- elif isinstance(vocab_size_or_config_json_file, int):
62
- self.vocab_size = vocab_size_or_config_json_file
63
- self.hidden_size = hidden_size
64
- self.num_hidden_layers = num_hidden_layers
65
- self.num_attention_heads = num_attention_heads
66
- self.hidden_act = hidden_act
67
- self.intermediate_size = intermediate_size
68
- self.hidden_dropout_prob = hidden_dropout_prob
69
- self.attention_probs_dropout_prob = attention_probs_dropout_prob
70
- self.max_position_embeddings = max_position_embeddings
71
- self.type_vocab_size = type_vocab_size
72
- self.relax_projection = relax_projection
73
- self.new_pos_ids = new_pos_ids
74
- self.initializer_range = initializer_range
75
- self.task_idx = task_idx
76
- self.fp32_embedding = fp32_embedding
77
- self.ffn_type = ffn_type
78
- self.label_smoothing = label_smoothing
79
- self.num_qkv = num_qkv
80
- self.seg_emb = seg_emb
81
- else:
82
- raise ValueError("First argument must be either a vocabulary size (int)"
83
- "or the path to a pretrained model config file (str)")
84
-
85
  # @classmethod
86
  # def from_dict(cls, json_object):
87
  # """Constructs a `BertConfig` from a Python dictionary of parameters."""
 
10
  model_type = 'deberta-v2'
11
 
12
  def __init__(self,
13
+ vocab_size=22669,
14
  hidden_size=768,
15
  num_hidden_layers=12,
16
  num_attention_heads=12,
 
28
  ffn_type=0,
29
  label_smoothing=None,
30
  num_qkv=0,
31
+ seg_emb=False,
32
+ **kwargs):
33
  """Constructs BertConfig.
34
 
35
  Args:
 
54
  initializer_range: The sttdev of the truncated_normal_initializer for
55
  initializing all weight matrices.
56
  """
57
+ self.vocab_size = vocab_size
58
+ self.hidden_size = hidden_size
59
+ self.num_hidden_layers = num_hidden_layers
60
+ self.num_attention_heads = num_attention_heads
61
+ self.hidden_act = hidden_act
62
+ self.intermediate_size = intermediate_size
63
+ self.hidden_dropout_prob = hidden_dropout_prob
64
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
65
+ self.max_position_embeddings = max_position_embeddings
66
+ self.type_vocab_size = type_vocab_size
67
+ self.relax_projection = relax_projection
68
+ self.new_pos_ids = new_pos_ids
69
+ self.initializer_range = initializer_range
70
+ self.task_idx = task_idx
71
+ self.fp32_embedding = fp32_embedding
72
+ self.ffn_type = ffn_type
73
+ self.label_smoothing = label_smoothing
74
+ self.num_qkv = num_qkv
75
+ self.seg_emb = seg_emb
76
+ super().__init__(**kwargs)
 
 
 
 
 
 
 
 
 
77
  # @classmethod
78
  # def from_dict(cls, json_object):
79
  # """Constructs a `BertConfig` from a Python dictionary of parameters."""
modeling/deberta.py CHANGED
@@ -9,14 +9,25 @@
9
 
10
  import copy
11
  import torch
 
12
  from .ops import *
13
  from .bert import *
14
- from .config import ModelConfig
15
  from .cache_utils import load_model_state
 
16
 
17
  __all__ = ['DeBERTa']
18
 
19
- class DeBERTa(torch.nn.Module):
 
 
 
 
 
 
 
 
 
20
  """ DeBERTa encoder
21
  This module is composed of the input embedding layer with stacked transformer layers with disentangled attention.
22
 
@@ -31,8 +42,8 @@ class DeBERTa(torch.nn.Module):
31
 
32
  """
33
 
34
- def __init__(self, config=None, pre_trained=None):
35
- super().__init__()
36
  state = None
37
  if pre_trained is not None:
38
  state, model_config = load_model_state(pre_trained)
 
9
 
10
  import copy
11
  import torch
12
+ from transformers.configuration_utils import PretrainedConfig
13
  from .ops import *
14
  from .bert import *
15
+ from .config import DebertaConfig
16
  from .cache_utils import load_model_state
17
+ from transformers import PreTrainedModel
18
 
19
  __all__ = ['DeBERTa']
20
 
21
+
22
+ class DebertaPretrainedModel(PreTrainedModel):
23
+ config_class = DebertaConfig
24
+ base_model_prefix = 'deberta'
25
+
26
+ def __init__(self, config: PretrainedConfig, *inputs, **kwargs):
27
+ super().__init__(config, *inputs, **kwargs)
28
+
29
+
30
+ class DeBERTa(DebertaPretrainedModel):
31
  """ DeBERTa encoder
32
  This module is composed of the input embedding layer with stacked transformer layers with disentangled attention.
33
 
 
42
 
43
  """
44
 
45
+ def __init__(self, config, pre_trained=None):
46
+ super().__init__(config)
47
  state = None
48
  if pre_trained is not None:
49
  state, model_config = load_model_state(pre_trained)
pytorch.model-500000.bin → pytorch_model.bin RENAMED
File without changes