fpadovani commited on
Commit
8fda37c
1 Parent(s): 7347a44

Training in progress, step 2000, checkpoint

Browse files
checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c730e41d1bcfd71103e30657f95a9ae9efb962bbf6ed9b8cdb87c28a32180be5
3
  size 14809584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a4a4ff899412c0f94103da4db2d0d9d066ddc993d2489c0839e5cf8b9ef1ae
3
  size 14809584
checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c9b433a88b7ca02beb74edc7466bb2246344deea0848280de6b59b972378b69
3
  size 29680378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcff9961e9ae31dfc42d7e7d0ae109fcd0acfee2e17a4d3146700d07277d827e
3
  size 29680378
checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b4e8d85a41d0619fa1f8c5f8c08f47c4a6c437c454f5a187a5bb6975c0754b7
3
  size 1192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1351d9c97531aea713a6673605d4148ed6d2b7cf22d3a76ceb37f32de0e8cdf
3
  size 1192
checkpoint-2000/tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 128,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 3,
16
- "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
checkpoint-2000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.19154727458953857,
3
  "best_model_checkpoint": "/Users/frapadovani/Desktop/babyLM_controlled/models_trained/convergence_french/random_sentence_french/checkpoint-2000",
4
- "epoch": 0.003616819658138206,
5
  "eval_steps": 2000,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
@@ -9,23 +9,23 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.003616819658138206,
13
- "grad_norm": 0.14744240045547485,
14
  "learning_rate": 0.0001,
15
- "loss": 0.7697,
16
  "step": 2000
17
  },
18
  {
19
- "epoch": 0.003616819658138206,
20
- "eval_loss": 0.19154727458953857,
21
- "eval_runtime": 91.812,
22
- "eval_samples_per_second": 397.933,
23
- "eval_steps_per_second": 24.877,
24
  "step": 2000
25
  }
26
  ],
27
  "logging_steps": 2000,
28
- "max_steps": 552972,
29
  "num_input_tokens_seen": 0,
30
  "num_train_epochs": 1,
31
  "save_steps": 2000,
 
1
  {
2
+ "best_metric": 4.0236735343933105,
3
  "best_model_checkpoint": "/Users/frapadovani/Desktop/babyLM_controlled/models_trained/convergence_french/random_sentence_french/checkpoint-2000",
4
+ "epoch": 0.059218902673733455,
5
  "eval_steps": 2000,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.059218902673733455,
13
+ "grad_norm": 1.0812722444534302,
14
  "learning_rate": 0.0001,
15
+ "loss": 4.7523,
16
  "step": 2000
17
  },
18
  {
19
+ "epoch": 0.059218902673733455,
20
+ "eval_loss": 4.0236735343933105,
21
+ "eval_runtime": 5.173,
22
+ "eval_samples_per_second": 425.477,
23
+ "eval_steps_per_second": 26.677,
24
  "step": 2000
25
  }
26
  ],
27
  "logging_steps": 2000,
28
+ "max_steps": 33773,
29
  "num_input_tokens_seen": 0,
30
  "num_train_epochs": 1,
31
  "save_steps": 2000,
checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e294196a968439ae6368c7b1ef6eb6e2124702ff760349c5ba2ea15623271d
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce3a8508ad2aa1c0ae1cc54c6fe1747e2312953499cad9c035bc201c305894e
3
  size 5496