ltg
/

davda54 commited on
Commit
cfe44fe
·
1 Parent(s): 8e47ceb

Update configuration_ltgbert.py

Browse files
Files changed (1) hide show
  1. configuration_ltgbert.py +77 -4
configuration_ltgbert.py CHANGED
@@ -1,12 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers.configuration_utils import PretrainedConfig
2
 
3
 
4
- class LTGBertConfig(PretrainedConfig):
5
- """Configuration class to store the configuration of a `LTGBertModel`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
 
7
  def __init__(
8
  self,
9
- vocab_size=50000,
10
  attention_probs_dropout_prob=0.1,
11
  hidden_dropout_prob=0.1,
12
  hidden_size=768,
@@ -16,10 +86,12 @@ class LTGBertConfig(PretrainedConfig):
16
  num_attention_heads=12,
17
  num_hidden_layers=12,
18
  layer_norm_eps=1.0e-7,
 
19
  output_all_encoded_layers=True,
 
20
  **kwargs,
21
  ):
22
- super().__init__(**kwargs)
23
 
24
  self.vocab_size = vocab_size
25
  self.hidden_size = hidden_size
@@ -32,3 +104,4 @@ class LTGBertConfig(PretrainedConfig):
32
  self.output_all_encoded_layers = output_all_encoded_layers
33
  self.position_bucket_size = position_bucket_size
34
  self.layer_norm_eps = layer_norm_eps
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023 Language Technology Group from University of Oslo and The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """ LTG-BERT configutation """
17
+
18
+
19
  from transformers.configuration_utils import PretrainedConfig
20
 
21
 
22
+ LTG_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
23
+ "bnc-bert-span": "https://huggingface.co/ltg/bnc-bert-span",
24
+ "bnc-bert-span-2x": "https://huggingface.co/ltg/bnc-bert-span-2x",
25
+ "bnc-bert-span-0.5x": "https://huggingface.co/ltg/bnc-bert-span-0.5x",
26
+ "bnc-bert-span-0.25x": "https://huggingface.co/ltg/bnc-bert-span-0.25x",
27
+ "bnc-bert-span-order": "https://huggingface.co/ltg/bnc-bert-span-order",
28
+ "bnc-bert-span-document": "https://huggingface.co/ltg/bnc-bert-span-document",
29
+ "bnc-bert-span-word": "https://huggingface.co/ltg/bnc-bert-span-word",
30
+ "bnc-bert-span-subword": "https://huggingface.co/ltg/bnc-bert-span-subword",
31
+
32
+ "norbert3-xs": "https://huggingface.co/ltg/norbert3-xs/config.json",
33
+ "norbert3-small": "https://huggingface.co/ltg/norbert3-small/config.json",
34
+ "norbert3-base": "https://huggingface.co/ltg/norbert3-base/config.json",
35
+ "norbert3-large": "https://huggingface.co/ltg/norbert3-large/config.json",
36
+
37
+ "norbert3-oversampled-base": "https://huggingface.co/ltg/norbert3-oversampled-base/config.json",
38
+ "norbert3-ncc-base": "https://huggingface.co/ltg/norbert3-ncc-base/config.json",
39
+ "norbert3-nak-base": "https://huggingface.co/ltg/norbert3-nak-base/config.json",
40
+ "norbert3-nb-base": "https://huggingface.co/ltg/norbert3-nb-base/config.json",
41
+ "norbert3-wiki-base": "https://huggingface.co/ltg/norbert3-wiki-base/config.json",
42
+ "norbert3-c4-base": "https://huggingface.co/ltg/norbert3-c4-base/config.json"
43
+ }
44
+
45
+
46
+ class LtgBertConfig(PretrainedConfig):
47
+ r"""
48
+ This is the configuration class to store the configuration of a [`LtgBertModel`]. It is used to
49
+ instantiate an LTG-BERT model according to the specified arguments, defining the model architecture.
50
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
51
+ documentation from [`PretrainedConfig`] for more information.
52
+ Args:
53
+ vocab_size (`int`, *optional*, defaults to 16384):
54
+ Vocabulary size of the LTG-BERT model. Defines the number of different tokens that can be represented by the
55
+ `inputs_ids` passed when calling [`LtgBertModel`].
56
+ hidden_size (`int`, *optional*, defaults to 768):
57
+ Dimensionality of the encoder layers and the pooler layer.
58
+ num_hidden_layers (`int`, *optional*, defaults to 12):
59
+ Number of hidden layers in the Transformer encoder.
60
+ num_attention_heads (`int`, *optional*, defaults to 12):
61
+ Number of attention heads for each attention layer in the Transformer encoder.
62
+ intermediate_size (`int`, *optional*, defaults to 2048):
63
+ Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
64
+ hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
65
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
66
+ attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
67
+ The dropout ratio for the attention probabilities.
68
+ max_position_embeddings (`int`, *optional*, defaults to 512):
69
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
70
+ just in case (e.g., 512 or 1024 or 2048).
71
+ layer_norm_eps (`float`, *optional*, defaults to 1e-12):
72
+ The epsilon used by the layer normalization layers.
73
+ classifier_dropout (`float`, *optional*):
74
+ The dropout ratio for the classification head.
75
  """
76
+ model_type = "bert"
77
  def __init__(
78
  self,
79
+ vocab_size=16384,
80
  attention_probs_dropout_prob=0.1,
81
  hidden_dropout_prob=0.1,
82
  hidden_size=768,
 
86
  num_attention_heads=12,
87
  num_hidden_layers=12,
88
  layer_norm_eps=1.0e-7,
89
+ pad_token_id=4,
90
  output_all_encoded_layers=True,
91
+ classifier_dropout=None,
92
  **kwargs,
93
  ):
94
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
95
 
96
  self.vocab_size = vocab_size
97
  self.hidden_size = hidden_size
 
104
  self.output_all_encoded_layers = output_all_encoded_layers
105
  self.position_bucket_size = position_bucket_size
106
  self.layer_norm_eps = layer_norm_eps
107
+ self.classifier_dropout = classifier_dropout