Kalaphant commited on
Commit
7e69917
1 Parent(s): 1268e8d

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +28 -26
config.json CHANGED
@@ -1,28 +1,30 @@
1
  {
2
- "model_type": "bert", // Specify the type of model (e.g., bert, gpt, etc.)
3
- "vocabulary_size": 30522, // Vocabulary size of the model
4
- "hidden_size": 768, // Size of the hidden layers
5
- "num_attention_heads": 12, // Number of attention heads in the model
6
- "num_hidden_layers": 12, // Number of hidden layers in the model
7
- "intermediate_size": 3072, // Size of the intermediate layers
8
- "activation_function": "gelu", // Activation function used in the model
9
- "initializer_range": 0.02, // Standard deviation of the truncated_normal_initializer
10
- "layer_norm_eps": 1e-12, // Epsilon value for layer normalization
11
- "max_position_embeddings": 512, // Maximum length of sequences
12
- "tokenizer_type": "WordPiece", // Type of tokenizer used
13
- "special_tokens": {
14
- "pad_token": "[PAD]", // Padding token
15
- "unk_token": "[UNK]", // Unknown token
16
- "cls_token": "[CLS]", // Classification token
17
- "sep_token": "[SEP]", // Separator token
18
- "mask_token": "[MASK]" // Masking token
19
- },
20
- "dropout_rate": 0.1, // Dropout rate for regularization
21
- "learning_rate": 0.00005, // Learning rate for training
22
- "optimizer": "adamw", // Optimizer used during training
23
- "num_labels": 2, // Number of labels for classification tasks
24
- "train_batch_size": 16, // Batch size for training
25
- "eval_batch_size": 32, // Batch size for evaluation
26
- "epochs": 3, // Number of epochs for training
27
- "early_stopping_patience": 3 // Patience for early stopping
 
 
28
  }
 
1
  {
2
+ "model_type": "gpt2",
3
+ "vocab_size": 50257,
4
+ "n_positions": 1024,
5
+ "n_ctx": 1024,
6
+ "n_embd": 768,
7
+ "n_layer": 12,
8
+ "n_head": 12,
9
+ "n_inner": null,
10
+ "activation_function": "gelu_new",
11
+ "resid_pdrop": 0.1,
12
+ "embd_pdrop": 0.1,
13
+ "attn_pdrop": 0.1,
14
+ "layer_norm_epsilon": 1e-5,
15
+ "initializer_range": 0.02,
16
+ "summary_type": "cls_index",
17
+ "summary_use_proj": true,
18
+ "summary_activation": null,
19
+ "summary_proj_to_labels": true,
20
+ "summary_first_dropout": 0.1,
21
+ "scale_attn_weights": true,
22
+ "bos_token_id": 50256,
23
+ "eos_token_id": 50256,
24
+ "architectures": [
25
+ "GPT2LMHeadModel"
26
+ ],
27
+ "tokenizer_class": "GPT2Tokenizer",
28
+ "pad_token_id": 50256,
29
+ "transformers_version": "4.0.0"
30
  }