Upload 4 files

Knowledge Distillation of BERT Language Model on the Arabic Language , config and model bin folder, the teacher model used to train the student is `asafaya/bert-large-arabic`

Files changed (4) hide show

config.json +21 -0
git_log.json +5 -0
parameters.json +51 -0
pytorch_model.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "_name_or_path": "/local/musaeed/UofkDistill/model/checkpoint.pth",
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 1024,
+  "dropout": 0.1,
+  "hidden_dim": 4096,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 16,
+  "n_layers": 6,
+  "output_hidden_states": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": true,
+  "tie_weights_": true,
+  "transformers_version": "4.24.0",
+  "vocab_size": 32000
+}

git_log.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "repo_id": "<git.Repo \"/local/musaeed/UofkDistill/transformers/.git\">",
+    "repo_sha": "699e90437f984d69ad3c9b891dd2e9d0fc2cffe4",
+    "repo_branch": "main"
+}

parameters.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+    "force": true,
+    "dump_path": "/local/musaeed/UofkDistill/Dumps/",
+    "data_file": "/local/musaeed/UofkDistill/transformers/examples/research_projects/distillation/The_data/merged_data_binarized.pickle",
+    "student_type": "distilbert",
+    "student_config": "/local/musaeed/UofkDistill/transformers_/transformers/examples/research_projects/distillation/training_configs/distilbert-base-uncased.json",
+    "student_pretrained_weights": "/local/musaeed/UofkDistill/model/checkpoint.pth",
+    "teacher_type": "bert",
+    "teacher_name": "asafaya/bert-large-arabic",
+    "temperature": 2.0,
+    "alpha_ce": 5.0,
+    "alpha_mlm": 2.0,
+    "alpha_clm": 0.0,
+    "alpha_mse": 0.0,
+    "alpha_cos": 1.0,
+    "mlm": true,
+    "mlm_mask_prop": 0.15,
+    "word_mask": 0.8,
+    "word_keep": 0.1,
+    "word_rand": 0.1,
+    "mlm_smoothing": 0.7,
+    "token_counts": "/local/musaeed/UofkDistill/transformers/examples/research_projects/distillation/The_data/merged_token_count.pickle",
+    "restrict_ce_to_mask": false,
+    "freeze_pos_embs": true,
+    "freeze_token_type_embds": false,
+    "n_epoch": 3,
+    "batch_size": 16,
+    "group_by_size": true,
+    "gradient_accumulation_steps": 50,
+    "warmup_prop": 0.05,
+    "weight_decay": 0.0,
+    "learning_rate": 3e-05,
+    "adam_epsilon": 1e-06,
+    "max_grad_norm": 5.0,
+    "initializer_range": 0.02,
+    "fp16": false,
+    "fp16_opt_level": "O1",
+    "n_gpu": 4,
+    "local_rank": 0,
+    "seed": 56,
+    "log_interval": 500,
+    "checkpoint_interval": 10000,
+    "world_size": 4,
+    "n_gpu_per_node": 4,
+    "global_rank": 0,
+    "n_nodes": 1,
+    "node_id": 0,
+    "multi_gpu": true,
+    "is_master": true,
+    "multi_node": false
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b22e23014de7252d3fa488bcb494ee2bce4221d27df6f2b87203d0d19b2c7d88
+size 439862319