update model name

Browse files

Files changed (3) hide show

modeling/config.py +23 -31
modeling/deberta.py +15 -4
pytorch.model-500000.bin → pytorch_model.bin +0 -0

modeling/config.py CHANGED Viewed

@@ -10,7 +10,7 @@ class DebertaConfig(PretrainedConfig):
     model_type = 'deberta-v2'
     def __init__(self,
-                 vocab_size_or_config_json_file,
                  hidden_size=768,
                  num_hidden_layers=12,
                  num_attention_heads=12,
@@ -28,7 +28,8 @@ class DebertaConfig(PretrainedConfig):
                  ffn_type=0,
                  label_smoothing=None,
                  num_qkv=0,
-                 seg_emb=False):
         """Constructs BertConfig.
         Args:
@@ -53,35 +54,26 @@ class DebertaConfig(PretrainedConfig):
             initializer_range: The sttdev of the truncated_normal_initializer for
                 initializing all weight matrices.
         """
-        if isinstance(vocab_size_or_config_json_file, str):
-            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
-                json_config = json.loads(reader.read())
-            for key, value in json_config.items():
-                self.__dict__[key] = value
-        elif isinstance(vocab_size_or_config_json_file, int):
-            self.vocab_size = vocab_size_or_config_json_file
-            self.hidden_size = hidden_size
-            self.num_hidden_layers = num_hidden_layers
-            self.num_attention_heads = num_attention_heads
-            self.hidden_act = hidden_act
-            self.intermediate_size = intermediate_size
-            self.hidden_dropout_prob = hidden_dropout_prob
-            self.attention_probs_dropout_prob = attention_probs_dropout_prob
-            self.max_position_embeddings = max_position_embeddings
-            self.type_vocab_size = type_vocab_size
-            self.relax_projection = relax_projection
-            self.new_pos_ids = new_pos_ids
-            self.initializer_range = initializer_range
-            self.task_idx = task_idx
-            self.fp32_embedding = fp32_embedding
-            self.ffn_type = ffn_type
-            self.label_smoothing = label_smoothing
-            self.num_qkv = num_qkv
-            self.seg_emb = seg_emb
-        else:
-            raise ValueError("First argument must be either a vocabulary size (int)"
-                             "or the path to a pretrained model config file (str)")
     # @classmethod
     # def from_dict(cls, json_object):
     #     """Constructs a `BertConfig` from a Python dictionary of parameters."""

     model_type = 'deberta-v2'
     def __init__(self,
+                 vocab_size=22669,
                  hidden_size=768,
                  num_hidden_layers=12,
                  num_attention_heads=12,
                  ffn_type=0,
                  label_smoothing=None,
                  num_qkv=0,
+                 seg_emb=False,
+                 **kwargs):
         """Constructs BertConfig.
         Args:
             initializer_range: The sttdev of the truncated_normal_initializer for
                 initializing all weight matrices.
         """
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.relax_projection = relax_projection
+        self.new_pos_ids = new_pos_ids
+        self.initializer_range = initializer_range
+        self.task_idx = task_idx
+        self.fp32_embedding = fp32_embedding
+        self.ffn_type = ffn_type
+        self.label_smoothing = label_smoothing
+        self.num_qkv = num_qkv
+        self.seg_emb = seg_emb
+        super().__init__(**kwargs)
     # @classmethod
     # def from_dict(cls, json_object):
     #     """Constructs a `BertConfig` from a Python dictionary of parameters."""

modeling/deberta.py CHANGED Viewed

@@ -9,14 +9,25 @@
 import copy
 import torch
 from .ops import *
 from .bert import *
-from .config import ModelConfig
 from .cache_utils import load_model_state
 __all__ = ['DeBERTa']
-class DeBERTa(torch.nn.Module):
   """ DeBERTa encoder
   This module is composed of the input embedding layer with stacked transformer layers with disentangled attention.
@@ -31,8 +42,8 @@ class DeBERTa(torch.nn.Module):
   """
-  def __init__(self, config=None, pre_trained=None):
-    super().__init__()
     state = None
     if pre_trained is not None:
       state, model_config = load_model_state(pre_trained)

 import copy
 import torch
+from transformers.configuration_utils import PretrainedConfig
 from .ops import *
 from .bert import *
+from .config import DebertaConfig
 from .cache_utils import load_model_state
+from transformers import PreTrainedModel
 __all__ = ['DeBERTa']
+class DebertaPretrainedModel(PreTrainedModel):
+  config_class = DebertaConfig
+  base_model_prefix = 'deberta'
+  def __init__(self, config: PretrainedConfig, *inputs, **kwargs):
+    super().__init__(config, *inputs, **kwargs)
+class DeBERTa(DebertaPretrainedModel):
   """ DeBERTa encoder
   This module is composed of the input embedding layer with stacked transformer layers with disentangled attention.
   """
+  def __init__(self, config, pre_trained=None):
+    super().__init__(config)
     state = None
     if pre_trained is not None:
       state, model_config = load_model_state(pre_trained)

pytorch.model-500000.bin → pytorch_model.bin RENAMED Viewed

File without changes