jargon-NACHOS / jargon_configuration.py
a-mannion's picture
Upload 8 files
acc09a7 verified
from transformers.models.roberta.modeling_roberta import RobertaConfig
class JargonConfig(RobertaConfig):
model_type = "jargon"
def __init__(
self,
compress_layer= 1,
shared_layer_kv_compressed=1,
shared_kv_compressed=0,
max_positions=512,
max_position_embeddings=512,
compressed=4,
vocab_size=30522,
freeze_compress=0,
embed_dim=768,
num_heads=16,
dim_feedforward=4096,
dropout=0.1,
activation="relu",
layer_norm_eps=1e-05,
self_attention=True,
encoder_decoder_attention=False,
bias=True,
q_noise=0,
qn_block_size=8,
add_bias_kv=False,
add_zero_attn=False,
num_layers=12,
untie_weights_roberta=False,
layernorm_embedding=False,
encoder_normalize_before=False,
encoder_embed_dim=768,
encoder_attention_heads=12,
quant_noise_pq=0.0,
quant_noise_pq_block_size=8,
quant_noise_scalar=0,
encoder_ffn_embed_dim=4096,
add_pooling_layer=False,
intermediate_size=4096,
intermediate_act_fn="relu",
hidden_act="relu",
output_hidden_states=False,
position_embedding_type="learned",
**kwargs
):
super().__init__(**kwargs)
self.add_pooling_layer = add_pooling_layer
self.compress_layer = compress_layer
self.shared_layer_kv_compressed = shared_layer_kv_compressed
self.shared_kv_compressed = shared_kv_compressed
self.max_positions = max_positions
self.max_position_embeddings = max_position_embeddings
self.compressed = compressed
self.freeze_compress = freeze_compress
self.embed_dim = embed_dim
self.num_heads = num_heads
self.dim_feedforward=dim_feedforward
self.dropout = dropout
self.activation= activation
self.layer_norm_eps = layer_norm_eps
self.self_attention = self_attention
self.encoder_decoder_attention = encoder_decoder_attention
self.bias = bias
self.q_noise = q_noise
self.qn_block_size = qn_block_size
self.add_bias_kv = add_bias_kv
self.add_zero_attn = add_zero_attn
self.num_layers = num_layers
self.untie_weights_roberta = untie_weights_roberta
self.layernorm_embedding=layernorm_embedding
self.encoder_embed_dim = encoder_embed_dim
self.encoder_attention_heads=encoder_attention_heads
self.quant_noise_pq = quant_noise_pq
self.quant_noise_pq_block_size=quant_noise_pq_block_size
self.quant_noise_scalar=quant_noise_scalar
self.encoder_normalize_before=encoder_normalize_before
self.encoder_ffn_embed_dim = encoder_ffn_embed_dim
self.vocab_size = vocab_size
self.intermediate_size = intermediate_size
self.intermediate_act_fn = intermediate_act_fn
self.output_hidden_states = output_hidden_states
self.hidden_act = hidden_act
self.position_embedding_type = position_embedding_type
self.encoder_normalize_before = encoder_normalize_before