Kalaphant
/

KalaBot

@@ -1,28 +1,30 @@
 {
-  "model_type": "bert",  // Specify the type of model (e.g., bert, gpt, etc.)
-  "vocabulary_size": 30522,  // Vocabulary size of the model
-  "hidden_size": 768,  // Size of the hidden layers
-  "num_attention_heads": 12,  // Number of attention heads in the model
-  "num_hidden_layers": 12,  // Number of hidden layers in the model
-  "intermediate_size": 3072,  // Size of the intermediate layers
-  "activation_function": "gelu",  // Activation function used in the model
-  "initializer_range": 0.02,  // Standard deviation of the truncated_normal_initializer
-  "layer_norm_eps": 1e-12,  // Epsilon value for layer normalization
-  "max_position_embeddings": 512,  // Maximum length of sequences
-  "tokenizer_type": "WordPiece",  // Type of tokenizer used
-  "special_tokens": {
-    "pad_token": "[PAD]",  // Padding token
-    "unk_token": "[UNK]",  // Unknown token
-    "cls_token": "[CLS]",  // Classification token
-    "sep_token": "[SEP]",  // Separator token
-    "mask_token": "[MASK]"  // Masking token
-  },
-  "dropout_rate": 0.1,  // Dropout rate for regularization
-  "learning_rate": 0.00005,  // Learning rate for training
-  "optimizer": "adamw",  // Optimizer used during training
-  "num_labels": 2,  // Number of labels for classification tasks
-  "train_batch_size": 16,  // Batch size for training
-  "eval_batch_size": 32,  // Batch size for evaluation
-  "epochs": 3,  // Number of epochs for training
-  "early_stopping_patience": 3  // Patience for early stopping
 }

 {
+  "model_type": "gpt2",
+  "vocab_size": 50257,
+  "n_positions": 1024,
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_layer": 12,
+  "n_head": 12,
+  "n_inner": null,
+  "activation_function": "gelu_new",
+  "resid_pdrop": 0.1,
+  "embd_pdrop": 0.1,
+  "attn_pdrop": 0.1,
+  "layer_norm_epsilon": 1e-5,
+  "initializer_range": 0.02,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "summary_activation": null,
+  "summary_proj_to_labels": true,
+  "summary_first_dropout": 0.1,
+  "scale_attn_weights": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "tokenizer_class": "GPT2Tokenizer",
+  "pad_token_id": 50256,
+  "transformers_version": "4.0.0"
 }