Update configuration_opt.py
Browse files- configuration_opt.py +2 -1
configuration_opt.py
CHANGED
@@ -56,6 +56,7 @@ class OPTConfig(PretrainedConfig):
|
|
56 |
do_layer_norm_before (`bool`, *optional*, defaults to `True`):
|
57 |
|
58 |
Whether to perform layer normalization before the attention block.
|
|
|
59 |
word_embed_proj_dim (`int`, *optional*):
|
60 |
`word_embed_proj_dim` can be set to down-project word embeddings, *e.g.* `opt-350m`. Defaults to
|
61 |
`hidden_size`.
|
@@ -115,7 +116,7 @@ class OPTConfig(PretrainedConfig):
|
|
115 |
eos_token_id=2,
|
116 |
enable_bias=True,
|
117 |
layer_norm_elementwise_affine=True,
|
118 |
-
attn_implementation='eager',
|
119 |
**kwargs,
|
120 |
):
|
121 |
super().__init__(
|
|
|
56 |
do_layer_norm_before (`bool`, *optional*, defaults to `True`):
|
57 |
|
58 |
Whether to perform layer normalization before the attention block.
|
59 |
+
|
60 |
word_embed_proj_dim (`int`, *optional*):
|
61 |
`word_embed_proj_dim` can be set to down-project word embeddings, *e.g.* `opt-350m`. Defaults to
|
62 |
`hidden_size`.
|
|
|
116 |
eos_token_id=2,
|
117 |
enable_bias=True,
|
118 |
layer_norm_elementwise_affine=True,
|
119 |
+
#attn_implementation='eager',
|
120 |
**kwargs,
|
121 |
):
|
122 |
super().__init__(
|