Aunsiels commited on
Commit
213757c
1 Parent(s): 6ebf578

Upload 10 files

Browse files
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"<SEP>": 28998, "<BOS>": 28996, "<PAD>": 28999, "<EOS>": 28997}
 
1
+ {"<BOS>": 28996, "<SEP>": 28998, "<PAD>": 28999, "<EOS>": 28997}
eval_results_mlm.txt CHANGED
@@ -1 +1 @@
1
- perplexity = 11.000638008243978
 
1
+ perplexity = 47.5140214816094
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1af4f0f3524fa8ffd0f6083132bed8a0309bba8f0e4104cfdc66e47101a6ca5
3
- size 1334625496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2fbf5dab4a15412f879cef823c7c4640ff99951eadbf6f65cfd3e3376e18a82
3
+ size 1334625493
train_results.txt CHANGED
@@ -1,11 +1,11 @@
1
- epoch = 5.0
2
- init_mem_cpu_alloc_delta = 1159303168
3
- init_mem_cpu_peaked_delta = 916664320
4
  init_mem_gpu_alloc_delta = 1335215616
5
  init_mem_gpu_peaked_delta = 0
6
- train_mem_cpu_alloc_delta = 29356032
7
- train_mem_cpu_peaked_delta = 0
8
  train_mem_gpu_alloc_delta = 4003456000
9
  train_mem_gpu_peaked_delta = 19106743296
10
- train_runtime = 393.5719
11
- train_samples_per_second = 0.978
 
1
+ epoch = 10.0
2
+ init_mem_cpu_alloc_delta = 1168576512
3
+ init_mem_cpu_peaked_delta = 1323925504
4
  init_mem_gpu_alloc_delta = 1335215616
5
  init_mem_gpu_peaked_delta = 0
6
+ train_mem_cpu_alloc_delta = 54071296
7
+ train_mem_cpu_peaked_delta = 356077568
8
  train_mem_gpu_alloc_delta = 4003456000
9
  train_mem_gpu_peaked_delta = 19106743296
10
+ train_runtime = 980.3735
11
+ train_samples_per_second = 0.785
trainer_state.json CHANGED
@@ -1,23 +1,29 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "global_step": 385,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 5.0,
12
- "step": 385,
13
- "total_flos": 3141206509117440.0,
14
- "train_runtime": 393.5719,
15
- "train_samples_per_second": 0.978
 
 
 
 
 
 
16
  }
17
  ],
18
- "max_steps": 385,
19
- "num_train_epochs": 5,
20
- "total_flos": 3141206509117440.0,
21
  "trial_name": null,
22
  "trial_params": null
23
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 770,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 6.49,
12
+ "learning_rate": 3.506493506493507e-08,
13
+ "loss": 4.2363,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 10.0,
18
+ "step": 770,
19
+ "total_flos": 6282413018234880.0,
20
+ "train_runtime": 980.3735,
21
+ "train_samples_per_second": 0.785
22
  }
23
  ],
24
+ "max_steps": 770,
25
+ "num_train_epochs": 10,
26
+ "total_flos": 6282413018234880.0,
27
  "trial_name": null,
28
  "trial_params": null
29
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d6747927314afef61a048408044c8503515955e00c6d9c7bf4bd12f66916167
3
- size 2351
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4d1f848dbe017c8469705d9ef92d62e3ed52b8479d6f13ba39a024dac0c5229
3
+ size 2415