Sifal commited on
Commit
43433dd
·
verified ·
1 Parent(s): 616bbe5

Create configuration_bert.py

Browse files
Files changed (1) hide show
  1. configuration_bert.py +24 -0
configuration_bert.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 MosaicML Examples authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from transformers import BertConfig as TransformersBertConfig
5
+
6
+
7
+ class BertConfig(TransformersBertConfig):
8
+
9
+ def __init__(
10
+ self,
11
+ alibi_starting_size: int = 512,
12
+ attention_probs_dropout_prob: float = 0.0,
13
+ **kwargs,
14
+ ):
15
+ """Configuration class for MosaicBert.
16
+ Args:
17
+ alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
18
+ create when initializing the model. You should be able to ignore this parameter in most cases.
19
+ Defaults to 512.
20
+ attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
21
+ (otherwise, Flash Attention will be off by default). Defaults to 0.0.
22
+ """
23
+ super().__init__(attention_probs_dropout_prob=attention_probs_dropout_prob, **kwargs)
24
+ self.alibi_starting_size = alibi_starting_size