boris commited on
Commit
69bcbeb
1 Parent(s): a7e5050

feat: placeholders for more config

Browse files
src/dalle_mini/model/configuration.py CHANGED
@@ -65,6 +65,8 @@ class DalleBartConfig(PretrainedFromWandbMixin, PretrainedConfig):
65
  tau_init=0.05, # used only in cosine attention (Swin v2)
66
  use_deepnet_scaling=False, # used in Deepnet
67
  use_glu=False, # "GLU Variants Improve Transformer"
 
 
68
  # parameters that should not be necessary but could affect results
69
  force_ln_scale=True, # force scale in layernorm even when followed by dense layers
70
  force_final_ln_encoder=False, # force layer normalization in encoder final layer even when followed by dense layers
@@ -88,11 +90,14 @@ class DalleBartConfig(PretrainedFromWandbMixin, PretrainedConfig):
88
  ], "ln_positions must be 'normformer', 'swinv2' or 'deepnet'"
89
  if ln_positions == "deepnet":
90
  ln_positions = "postln"
 
91
  self.ln_positions = ln_positions
92
  self.use_cosine_attention = use_cosine_attention
93
  self.tau_init = tau_init
94
  self.use_deepnet_scaling = use_deepnet_scaling
95
  self.use_glu = use_glu
 
 
96
  self.force_ln_scale = force_ln_scale
97
  self.force_final_ln_encoder = force_final_ln_encoder
98
 
 
65
  tau_init=0.05, # used only in cosine attention (Swin v2)
66
  use_deepnet_scaling=False, # used in Deepnet
67
  use_glu=False, # "GLU Variants Improve Transformer"
68
+ use_alibi=False, # from "Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation"
69
+ sink_iters=1, # used in SinkFormers
70
  # parameters that should not be necessary but could affect results
71
  force_ln_scale=True, # force scale in layernorm even when followed by dense layers
72
  force_final_ln_encoder=False, # force layer normalization in encoder final layer even when followed by dense layers
 
90
  ], "ln_positions must be 'normformer', 'swinv2' or 'deepnet'"
91
  if ln_positions == "deepnet":
92
  ln_positions = "postln"
93
+ assert use_alibi is False, "use_alibi is not supported yet"
94
  self.ln_positions = ln_positions
95
  self.use_cosine_attention = use_cosine_attention
96
  self.tau_init = tau_init
97
  self.use_deepnet_scaling = use_deepnet_scaling
98
  self.use_glu = use_glu
99
+ self.use_alibi = use_alibi
100
+ self.sink_iters = sink_iters
101
  self.force_ln_scale = force_ln_scale
102
  self.force_final_ln_encoder = force_final_ln_encoder
103