ivanzhouyq commited on
Commit
8de19fe
1 Parent(s): 35942d5

Update configurations

Browse files
Files changed (2) hide show
  1. backpack_config.py +18 -12
  2. config.json +82 -1
backpack_config.py CHANGED
@@ -2,16 +2,22 @@ from transformers import GPT2Config
2
 
3
 
4
  class BackpackGPT2Config(GPT2Config):
5
- model_type = 'backpack-gpt2'
6
 
7
- def __init__(self,
8
- vocab_size=50264,
9
- num_senses=16,
10
- sense_intermediate_scale=4,
11
- n_positions=512,
12
- scale_attn_by_inverse_layer_idx=True,
13
- **kwargs,
14
- ):
15
- self.num_senses = num_senses
16
- self.sense_intermediate_scale = sense_intermediate_scale
17
- super().__init__(vocab_size=vocab_size, n_positions=n_positions, scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx, **kwargs)
 
 
 
 
 
 
 
2
 
3
 
4
  class BackpackGPT2Config(GPT2Config):
5
+ model_type = "backpack-gpt2"
6
 
7
+ def __init__(
8
+ self,
9
+ num_senses: int = 16,
10
+ sense_intermediate_scale: int = 4,
11
+ vocab_size: int = 50264,
12
+ n_positions: int = 512,
13
+ scale_attn_by_inverse_layer_idx: bool = True,
14
+ **kwargs,
15
+ ):
16
+ self.num_senses = num_senses
17
+ self.sense_intermediate_scale = sense_intermediate_scale
18
+ super().__init__(
19
+ vocab_size=vocab_size,
20
+ n_positions=n_positions,
21
+ scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
22
+ **kwargs,
23
+ )
config.json CHANGED
@@ -1 +1,82 @@
1
- {"return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": null, "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "bos_token_id": null, "pad_token_id": null, "eos_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.29.2", "vocab_size": 50264, "n_positions": 512, "n_layer": 36, "n_head": 20, "n_embd": 1280, "initializer_range": 0.02, "attn_pdrop": 0.0, "embd_pdrop": 0.0, "layer_norm_epsilon": 1e-05, "activation_function": "gelu_new", "scale_attn_by_inverse_layer_idx": true, "reorder_and_upcast_attn": false, "auto_map": {"AutoConfig": "backpack_config.BackpackGPT2Config", "AutoModelForCausalLM": "backpack_model.BackpackGPT2Model"}, "model_type": "backpack-gpt2"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "return_dict": true,
3
+ "output_hidden_states": false,
4
+ "output_attentions": false,
5
+ "torchscript": false,
6
+ "torch_dtype": null,
7
+ "use_bfloat16": false,
8
+ "tf_legacy_loss": false,
9
+ "pruned_heads": {},
10
+ "tie_word_embeddings": true,
11
+ "is_encoder_decoder": false,
12
+ "is_decoder": false,
13
+ "cross_attention_hidden_size": null,
14
+ "add_cross_attention": false,
15
+ "tie_encoder_decoder": false,
16
+ "max_length": 20,
17
+ "min_length": 0,
18
+ "do_sample": false,
19
+ "early_stopping": false,
20
+ "num_beams": 1,
21
+ "num_beam_groups": 1,
22
+ "diversity_penalty": 0.0,
23
+ "temperature": 1.0,
24
+ "top_k": 50,
25
+ "top_p": 1.0,
26
+ "typical_p": 1.0,
27
+ "repetition_penalty": 1.0,
28
+ "length_penalty": 1.0,
29
+ "no_repeat_ngram_size": 0,
30
+ "encoder_no_repeat_ngram_size": 0,
31
+ "bad_words_ids": null,
32
+ "num_return_sequences": 1,
33
+ "chunk_size_feed_forward": 0,
34
+ "output_scores": false,
35
+ "return_dict_in_generate": false,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "remove_invalid_values": false,
39
+ "exponential_decay_length_penalty": null,
40
+ "suppress_tokens": null,
41
+ "begin_suppress_tokens": null,
42
+ "architectures": null,
43
+ "finetuning_task": null,
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "label2id": {
49
+ "LABEL_0": 0,
50
+ "LABEL_1": 1
51
+ },
52
+ "tokenizer_class": null,
53
+ "prefix": null,
54
+ "bos_token_id": null,
55
+ "pad_token_id": null,
56
+ "eos_token_id": null,
57
+ "sep_token_id": null,
58
+ "decoder_start_token_id": null,
59
+ "task_specific_params": null,
60
+ "problem_type": null,
61
+ "_name_or_path": "",
62
+ "transformers_version": "4.29.2",
63
+ "vocab_size": 50264,
64
+ "n_positions": 512,
65
+ "n_layer": 12,
66
+ "n_head": 12,
67
+ "n_embd": 768,
68
+ "initializer_range": 0.02,
69
+ "attn_pdrop": 0.0,
70
+ "embd_pdrop": 0.0,
71
+ "layer_norm_epsilon": 1e-05,
72
+ "activation_function": "gelu_new",
73
+ "scale_attn_by_inverse_layer_idx": true,
74
+ "reorder_and_upcast_attn": false,
75
+ "num_senses": 16,
76
+ "sense_intermediate_scale": 4,
77
+ "auto_map": {
78
+ "AutoConfig": "backpack_config.BackpackGPT2Config",
79
+ "AutoModelForCausalLM": "backpack_model.BackpackGPT2Model"
80
+ },
81
+ "model_type": "backpack-gpt2"
82
+ }