sravanthib commited on
Commit
ce261c7
·
verified ·
1 Parent(s): 3748ad7

Training completed

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +26 -12
README.md CHANGED
@@ -45,7 +45,7 @@ The following hyperparameters were used during training:
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.03
48
- - training_steps: 5
49
 
50
  ### Training results
51
 
 
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.03
48
+ - training_steps: 20
49
 
50
  ### Training results
51
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0091324200913242,
3
- "total_flos": 6.970544231337165e+16,
4
- "train_loss": 11.061896514892577,
5
- "train_runtime": 106.6881,
6
- "train_samples_per_second": 7.498,
7
- "train_steps_per_second": 0.047
8
  }
 
1
  {
2
+ "epoch": 0.0365296803652968,
3
+ "total_flos": 2.788217692534866e+17,
4
+ "train_loss": 4.396173620223999,
5
+ "train_runtime": 365.615,
6
+ "train_samples_per_second": 8.752,
7
+ "train_steps_per_second": 0.055
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0091324200913242,
3
- "total_flos": 6.970544231337165e+16,
4
- "train_loss": 11.061896514892577,
5
- "train_runtime": 106.6881,
6
- "train_samples_per_second": 7.498,
7
- "train_steps_per_second": 0.047
8
  }
 
1
  {
2
+ "epoch": 0.0365296803652968,
3
+ "total_flos": 2.788217692534866e+17,
4
+ "train_loss": 4.396173620223999,
5
+ "train_runtime": 365.615,
6
+ "train_samples_per_second": 8.752,
7
+ "train_steps_per_second": 0.055
8
  }
trainer_state.json CHANGED
@@ -1,28 +1,42 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0091324200913242,
5
  "eval_steps": 0,
6
- "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0091324200913242,
13
- "step": 5,
14
- "total_flos": 6.970544231337165e+16,
15
- "train_loss": 11.061896514892577,
16
- "train_runtime": 106.6881,
17
- "train_samples_per_second": 7.498,
18
- "train_steps_per_second": 0.047
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 10,
22
- "max_steps": 5,
23
  "num_input_tokens_seen": 0,
24
  "num_train_epochs": 1,
25
- "save_steps": 5,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
28
  "args": {
@@ -35,7 +49,7 @@
35
  "attributes": {}
36
  }
37
  },
38
- "total_flos": 6.970544231337165e+16,
39
  "train_batch_size": 2,
40
  "trial_name": null,
41
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0365296803652968,
5
  "eval_steps": 0,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0182648401826484,
13
+ "grad_norm": 10.253478050231934,
14
+ "learning_rate": 0.0001,
15
+ "loss": 8.455,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0365296803652968,
20
+ "grad_norm": 0.12031922489404678,
21
+ "learning_rate": 0.0001,
22
+ "loss": 0.3374,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.0365296803652968,
27
+ "step": 20,
28
+ "total_flos": 2.788217692534866e+17,
29
+ "train_loss": 4.396173620223999,
30
+ "train_runtime": 365.615,
31
+ "train_samples_per_second": 8.752,
32
+ "train_steps_per_second": 0.055
33
  }
34
  ],
35
  "logging_steps": 10,
36
+ "max_steps": 20,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
+ "save_steps": 20,
40
  "stateful_callbacks": {
41
  "TrainerControl": {
42
  "args": {
 
49
  "attributes": {}
50
  }
51
  },
52
+ "total_flos": 2.788217692534866e+17,
53
  "train_batch_size": 2,
54
  "trial_name": null,
55
  "trial_params": null