Initial commit

Files changed (5) hide show

config.json +23 -0
merges.txt +0 -0
parameters.json +51 -0
pytorch_model.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "output_hidden_states": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.3.3",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 40000
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

parameters.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+    "force": false,
+    "dump_path": "serialization_dir/distilrobbert-v2-mini-unshuffled",
+    "data_file": "data/oscar_dutch/nl_unshuffled_mini-data.pdelobelle-robbert-v2-dutch-base.pickle",
+    "student_type": "roberta",
+    "student_config": "training_configs/distilrobbert-base.json",
+    "student_pretrained_weights": null,
+    "teacher_type": "roberta",
+    "teacher_name": "pdelobelle/robbert-v2-dutch-base",
+    "temperature": 2.0,
+    "alpha_ce": 5.0,
+    "alpha_mlm": 2.0,
+    "alpha_clm": 0.0,
+    "alpha_mse": 0.0,
+    "alpha_cos": 1.0,
+    "mlm": true,
+    "mlm_mask_prop": 0.15,
+    "word_mask": 0.8,
+    "word_keep": 0.1,
+    "word_rand": 0.1,
+    "mlm_smoothing": 0.7,
+    "token_counts": "data/oscar_dutch/nl_unshuffled_mini-counts-40k.pdelobelle-robbert-v2-dutch-base.pickle",
+    "restrict_ce_to_mask": false,
+    "freeze_pos_embs": true,
+    "freeze_token_type_embds": false,
+    "n_epoch": 3,
+    "batch_size": 5,
+    "group_by_size": true,
+    "gradient_accumulation_steps": 128,
+    "warmup_prop": 0.05,
+    "weight_decay": 0.01,
+    "learning_rate": 0.0005,
+    "adam_epsilon": 1e-06,
+    "max_grad_norm": 5.0,
+    "initializer_range": 0.02,
+    "fp16": false,
+    "fp16_opt_level": "O1",
+    "gpus": 1,
+    "local_rank": 0,
+    "seed": 56,
+    "log_interval": 500,
+    "checkpoint_interval": 4000,
+    "n_nodes": 1,
+    "node_id": 0,
+    "global_rank": 0,
+    "world_size": 1,
+    "n_gpu_per_node": 1,
+    "multi_gpu": false,
+    "is_master": true,
+    "multi_node": false
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bbf01b4ede29900c5cec741b081f7462565cffcbbc37c18d8989dbc931c0dc
+size 297155506

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff