Text Generation
Transformers
PyTorch
TensorBoard
Arabic
aragpt2
custom_code
wissamantoun commited on
Commit
c4b0d7f
1 Parent(s): ea715ce

Update model configuration and mappings

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. configuration_aragpt2.py +5 -5
  3. modeling_aragpt2.py +22 -37
config.json CHANGED
@@ -5,7 +5,7 @@
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "configuration_aragpt2.AraGPT2Config",
8
- "AutoForCausalLM": "modeling_aragpt2.AraGPT2ForCausalLM",
9
  "AutoModel": "modeling_aragpt2.AraGPT2Model"
10
  },
11
  "attention_probs_dropout_prob": 0.1,
 
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "configuration_aragpt2.AraGPT2Config",
8
+ "AutoModelForCausalLM": "modeling_aragpt2.AraGPT2LMHeadModel",
9
  "AutoModel": "modeling_aragpt2.AraGPT2Model"
10
  },
11
  "attention_probs_dropout_prob": 0.1,
configuration_aragpt2.py CHANGED
@@ -1,5 +1,5 @@
1
  # coding=utf-8
2
- """ AraAraGPT2 configuration"""
3
  from collections import OrderedDict
4
  from typing import Any, List, Mapping, Optional
5
 
@@ -18,7 +18,7 @@ AraGPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
18
 
19
  class AraGPT2Config(PretrainedConfig):
20
  """
21
- This is the configuration class to store the configuration of a [`AraAraGPT2Model`] or a [`TFAraAraGPT2Model`]. It is used to
22
  instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
23
  configuration with the defaults will yield a similar configuration to that of the AraGPT2
24
  [aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
131
  n_layer=12,
132
  n_head=12,
133
  n_inner=None,
134
- activation_function="gelu_new",
135
  resid_pdrop=0.1,
136
  embd_pdrop=0.1,
137
  attn_pdrop=0.1,
@@ -144,8 +144,8 @@ class AraGPT2Config(PretrainedConfig):
144
  summary_first_dropout=0.1,
145
  scale_attn_weights=True,
146
  use_cache=True,
147
- bos_token_id=50256,
148
- eos_token_id=50256,
149
  scale_attn_by_inverse_layer_idx=False,
150
  reorder_and_upcast_attn=False,
151
  **kwargs,
 
1
  # coding=utf-8
2
+ """ AraGPT2 configuration"""
3
  from collections import OrderedDict
4
  from typing import Any, List, Mapping, Optional
5
 
 
18
 
19
  class AraGPT2Config(PretrainedConfig):
20
  """
21
+ This is the configuration class to store the configuration of a [`AraGPT2Model`] or a [`TFAraGPT2Model`]. It is used to
22
  instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
23
  configuration with the defaults will yield a similar configuration to that of the AraGPT2
24
  [aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
 
131
  n_layer=12,
132
  n_head=12,
133
  n_inner=None,
134
+ activation_function="gelu",
135
  resid_pdrop=0.1,
136
  embd_pdrop=0.1,
137
  attn_pdrop=0.1,
 
144
  summary_first_dropout=0.1,
145
  scale_attn_weights=True,
146
  use_cache=True,
147
+ bos_token_id=0,
148
+ eos_token_id=0,
149
  scale_attn_by_inverse_layer_idx=False,
150
  reorder_and_upcast_attn=False,
151
  **kwargs,
modeling_aragpt2.py CHANGED
@@ -1,19 +1,5 @@
1
  # coding=utf-8
2
- # Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
3
- # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- """PyTorch OpenAI GPT-2 model."""
17
 
18
  import math
19
  import os
@@ -59,16 +45,12 @@ _CHECKPOINT_FOR_DOC = "aubmindlab/aragpt2-mega"
59
  _CONFIG_FOR_DOC = "AraGPT2Config"
60
  _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
61
 
62
- GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
63
  "aubmindlab/aragpt2-mega",
64
- "gpt2-medium",
65
- "aubmindlab/aragpt2-mega",
66
- "aubmindlab/aragpt2-mega",
67
- "distilgpt2",
68
- # See all GPT-2 models at https://huggingface.co/models?filter=gpt2
69
  ]
70
 
71
- _GPT2_ML_TF_TO_TORCH = {
72
  "LayerNorm_embed_norm": "emb_norm",
73
  "pos_embed": "wpe.weight",
74
  "word_embed": "wte.weight",
@@ -89,19 +71,22 @@ _GPT2_ML_TF_TO_TORCH = {
89
  "bias": "bias",
90
  }
91
 
 
 
 
92
 
93
  def convert_gpt2_checkpoint_to_pytorch(
94
- gpt2_checkpoint_path, gpt2_config_file, pytorch_dump_folder_path
95
  ):
96
  # Construct model
97
- if gpt2_config_file == "":
98
  config = AraGPT2Config()
99
  else:
100
- config = AraGPT2Config.from_json_file(gpt2_config_file)
101
  model = AraGPT2Model(config)
102
 
103
  # Load weights from numpy
104
- load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path)
105
 
106
  # Save pytorch-model
107
  pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
@@ -115,7 +100,7 @@ def convert_gpt2_checkpoint_to_pytorch(
115
 
116
  # XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
117
  # https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
118
- def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
119
  """Load tf checkpoints in a pytorch model"""
120
  try:
121
  import re
@@ -126,7 +111,7 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
126
  "https://www.tensorflow.org/install/ for installation instructions."
127
  )
128
  raise
129
- tf_path = os.path.abspath(gpt2_checkpoint_path)
130
  logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
131
  # Load weights from TF model
132
  init_vars = tf.train.list_variables(tf_path)
@@ -157,13 +142,13 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
157
 
158
  if sname == "" or sname == "embeddings":
159
  continue
160
- elif sname not in _GPT2_ML_TF_TO_TORCH:
161
  print("=========================================================")
162
  logger.info("Skip var name {}".format(scope_names))
163
  pointer = None
164
  break
165
  else:
166
- tname = _GPT2_ML_TF_TO_TORCH[sname]
167
  if "." in tname:
168
  parent, child = tname.split(".")
169
  pointer = getattr(pointer, parent)
@@ -602,7 +587,7 @@ class AraGPT2PreTrainedModel(PreTrainedModel):
602
  """
603
 
604
  config_class = AraGPT2Config
605
- load_tf_weights = load_tf_weights_in_gpt2
606
  base_model_prefix = "transformer"
607
  is_parallelizable = True
608
  supports_gradient_checkpointing = True
@@ -828,7 +813,7 @@ class AraGPT2Model(AraGPT2PreTrainedModel):
828
  _keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
829
  _keys_to_ignore_on_load_missing = ["attn.masked_bias"]
830
 
831
- def __init__(self, config):
832
  super().__init__(config)
833
 
834
  self.embed_dim = config.hidden_size
@@ -1177,7 +1162,7 @@ class AraGPT2LMHeadModel(AraGPT2PreTrainedModel):
1177
  ]
1178
  _tied_weights_keys = ["lm_head.weight"]
1179
 
1180
- def __init__(self, config):
1181
  super().__init__(config)
1182
  self.transformer = AraGPT2Model(config)
1183
  self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
@@ -1399,7 +1384,7 @@ class AraGPT2DoubleHeadsModel(AraGPT2PreTrainedModel):
1399
  ]
1400
  _tied_weights_keys = ["lm_head.weight"]
1401
 
1402
- def __init__(self, config):
1403
  super().__init__(config)
1404
  config.num_labels = 1
1405
  self.transformer = AraGPT2Model(config)
@@ -1653,7 +1638,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
1653
  ]
1654
  _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
1655
 
1656
- def __init__(self, config):
1657
  super().__init__(config)
1658
  self.num_labels = config.num_labels
1659
  self.transformer = AraGPT2Model(config)
@@ -1789,7 +1774,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
1789
  AraGPT2_START_DOCSTRING,
1790
  )
1791
  class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
1792
- def __init__(self, config):
1793
  super().__init__(config)
1794
  self.num_labels = config.num_labels
1795
 
@@ -1890,7 +1875,7 @@ class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
1890
  AraGPT2_START_DOCSTRING,
1891
  )
1892
  class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
1893
- def __init__(self, config):
1894
  super().__init__(config)
1895
  self.num_labels = config.num_labels
1896
  self.transformer = AraGPT2Model(config)
 
1
  # coding=utf-8
2
+ """PyTorch AraGPT2 model."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  import math
5
  import os
 
45
  _CONFIG_FOR_DOC = "AraGPT2Config"
46
  _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
47
 
48
+ ARAGPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
49
  "aubmindlab/aragpt2-mega",
50
+ # See all AraGPT2 models at https://huggingface.co/models?filter=aragpt2
 
 
 
 
51
  ]
52
 
53
+ _ARAGPT2_ML_TF_TO_TORCH = {
54
  "LayerNorm_embed_norm": "emb_norm",
55
  "pos_embed": "wpe.weight",
56
  "word_embed": "wte.weight",
 
71
  "bias": "bias",
72
  }
73
 
74
+ WEIGHTS_NAME = "pytorch_model.bin"
75
+ CONFIG_NAME = "config.json"
76
+
77
 
78
  def convert_gpt2_checkpoint_to_pytorch(
79
+ aragpt2_checkpoint_path, aragpt2_config_file, pytorch_dump_folder_path
80
  ):
81
  # Construct model
82
+ if aragpt2_config_file == "":
83
  config = AraGPT2Config()
84
  else:
85
+ config = AraGPT2Config.from_json_file(aragpt2_config_file)
86
  model = AraGPT2Model(config)
87
 
88
  # Load weights from numpy
89
+ load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path)
90
 
91
  # Save pytorch-model
92
  pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
 
100
 
101
  # XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
102
  # https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
103
+ def load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path):
104
  """Load tf checkpoints in a pytorch model"""
105
  try:
106
  import re
 
111
  "https://www.tensorflow.org/install/ for installation instructions."
112
  )
113
  raise
114
+ tf_path = os.path.abspath(aragpt2_checkpoint_path)
115
  logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
116
  # Load weights from TF model
117
  init_vars = tf.train.list_variables(tf_path)
 
142
 
143
  if sname == "" or sname == "embeddings":
144
  continue
145
+ elif sname not in _ARAGPT2_ML_TF_TO_TORCH:
146
  print("=========================================================")
147
  logger.info("Skip var name {}".format(scope_names))
148
  pointer = None
149
  break
150
  else:
151
+ tname = _ARAGPT2_ML_TF_TO_TORCH[sname]
152
  if "." in tname:
153
  parent, child = tname.split(".")
154
  pointer = getattr(pointer, parent)
 
587
  """
588
 
589
  config_class = AraGPT2Config
590
+ load_tf_weights = load_tf_weights_in_aragpt2
591
  base_model_prefix = "transformer"
592
  is_parallelizable = True
593
  supports_gradient_checkpointing = True
 
813
  _keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
814
  _keys_to_ignore_on_load_missing = ["attn.masked_bias"]
815
 
816
+ def __init__(self, config: AraGPT2Config):
817
  super().__init__(config)
818
 
819
  self.embed_dim = config.hidden_size
 
1162
  ]
1163
  _tied_weights_keys = ["lm_head.weight"]
1164
 
1165
+ def __init__(self, config: AraGPT2Config):
1166
  super().__init__(config)
1167
  self.transformer = AraGPT2Model(config)
1168
  self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
 
1384
  ]
1385
  _tied_weights_keys = ["lm_head.weight"]
1386
 
1387
+ def __init__(self, config: AraGPT2Config):
1388
  super().__init__(config)
1389
  config.num_labels = 1
1390
  self.transformer = AraGPT2Model(config)
 
1638
  ]
1639
  _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
1640
 
1641
+ def __init__(self, config: AraGPT2Config):
1642
  super().__init__(config)
1643
  self.num_labels = config.num_labels
1644
  self.transformer = AraGPT2Model(config)
 
1774
  AraGPT2_START_DOCSTRING,
1775
  )
1776
  class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
1777
+ def __init__(self, config: AraGPT2Config):
1778
  super().__init__(config)
1779
  self.num_labels = config.num_labels
1780
 
 
1875
  AraGPT2_START_DOCSTRING,
1876
  )
1877
  class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
1878
+ def __init__(self, config: AraGPT2Config):
1879
  super().__init__(config)
1880
  self.num_labels = config.num_labels
1881
  self.transformer = AraGPT2Model(config)