lsanochkin commited on
Commit
df04b7a
1 Parent(s): a5cb282

initial commit

Browse files
Files changed (6) hide show
  1. config.json +29 -0
  2. git_log.json +5 -0
  3. parameters.json +51 -0
  4. pytorch_model.bin +3 -0
  5. tokenizer.json +0 -0
  6. vocab.txt +0 -0
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "serialization_dir/distilelectra.pth",
3
+ "architectures": [
4
+ "ElectraForPreTraining"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 768,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "electra",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
+ "position_embedding_type": "absolute",
22
+ "summary_activation": "gelu",
23
+ "summary_last_dropout": 0.1,
24
+ "summary_type": "first",
25
+ "summary_use_proj": true,
26
+ "transformers_version": "4.11.3",
27
+ "type_vocab_size": 2,
28
+ "vocab_size": 30522
29
+ }
git_log.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "repo_id": "<git.repo.base.Repo '/home/jovyan/distil_tmp/distillation/.git'>",
3
+ "repo_sha": "de6dcaeed8e16839757d0c5a3a5f7e60f0b454e6",
4
+ "repo_branch": "master"
5
+ }
parameters.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "force": true,
3
+ "dump_path": "./serialization_dir/distilbert_mse",
4
+ "data_file": "data/binarized_text.electra.pickle",
5
+ "student_type": "electra",
6
+ "student_config": "training_configs/distilelectra.json",
7
+ "student_pretrained_weights": "serialization_dir/distilelectra.pth",
8
+ "teacher_type": "electra",
9
+ "teacher_name": "google/electra-base-discriminator",
10
+ "temperature": 2.0,
11
+ "alpha_ce": 5.0,
12
+ "alpha_mlm": 2.0,
13
+ "alpha_clm": 0.0,
14
+ "alpha_mse": 1.0,
15
+ "alpha_cos": 1.0,
16
+ "mlm": true,
17
+ "mlm_mask_prop": 0.15,
18
+ "word_mask": 0.8,
19
+ "word_keep": 0.1,
20
+ "word_rand": 0.1,
21
+ "mlm_smoothing": 0.7,
22
+ "token_counts": "data/token_counts.electra.pickle",
23
+ "restrict_ce_to_mask": false,
24
+ "freeze_pos_embs": true,
25
+ "freeze_token_type_embds": false,
26
+ "n_epoch": 50,
27
+ "batch_size": 5,
28
+ "group_by_size": true,
29
+ "gradient_accumulation_steps": 50,
30
+ "warmup_prop": 0.05,
31
+ "weight_decay": 0.0,
32
+ "learning_rate": 0.0005,
33
+ "adam_epsilon": 1e-06,
34
+ "max_grad_norm": 5.0,
35
+ "initializer_range": 0.02,
36
+ "fp16": false,
37
+ "fp16_opt_level": "O1",
38
+ "n_gpu": 4,
39
+ "local_rank": 0,
40
+ "seed": 56,
41
+ "log_interval": 500,
42
+ "checkpoint_interval": 4000,
43
+ "world_size": 4,
44
+ "n_gpu_per_node": 4,
45
+ "global_rank": 0,
46
+ "n_nodes": 1,
47
+ "node_id": 0,
48
+ "multi_gpu": true,
49
+ "is_master": true,
50
+ "multi_node": false
51
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d83dac7e383e19c6686bcf7294ad3477ce12176e95c697009fb7c2d0564ce0c
3
+ size 267998066
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff