sravanthib commited on
Commit
9e5059e
·
verified ·
1 Parent(s): b4f2a8d

Training completed

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +12 -33
README.md CHANGED
@@ -45,7 +45,7 @@ The following hyperparameters were used during training:
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.03
48
- - training_steps: 30
49
 
50
  ### Training results
51
 
 
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.03
48
+ - training_steps: 5
49
 
50
  ### Training results
51
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0547945205479452,
3
- "total_flos": 4.182326538802299e+17,
4
- "train_loss": 2.981964486837387,
5
- "train_runtime": 542.8101,
6
- "train_samples_per_second": 8.843,
7
- "train_steps_per_second": 0.055
8
  }
 
1
  {
2
+ "epoch": 0.0091324200913242,
3
+ "total_flos": 6.970544231337165e+16,
4
+ "train_loss": 11.061896514892577,
5
+ "train_runtime": 106.6881,
6
+ "train_samples_per_second": 7.498,
7
+ "train_steps_per_second": 0.047
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0547945205479452,
3
- "total_flos": 4.182326538802299e+17,
4
- "train_loss": 2.981964486837387,
5
- "train_runtime": 542.8101,
6
- "train_samples_per_second": 8.843,
7
- "train_steps_per_second": 0.055
8
  }
 
1
  {
2
+ "epoch": 0.0091324200913242,
3
+ "total_flos": 6.970544231337165e+16,
4
+ "train_loss": 11.061896514892577,
5
+ "train_runtime": 106.6881,
6
+ "train_samples_per_second": 7.498,
7
+ "train_steps_per_second": 0.047
8
  }
trainer_state.json CHANGED
@@ -1,49 +1,28 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0547945205479452,
5
  "eval_steps": 0,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0182648401826484,
13
- "grad_norm": 10.482062339782715,
14
- "learning_rate": 0.0001,
15
- "loss": 8.5393,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0365296803652968,
20
- "grad_norm": 0.1216701939702034,
21
- "learning_rate": 0.0001,
22
- "loss": 0.3612,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0547945205479452,
27
- "grad_norm": 0.08138233423233032,
28
- "learning_rate": 0.0001,
29
- "loss": 0.0454,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0547945205479452,
34
- "step": 30,
35
- "total_flos": 4.182326538802299e+17,
36
- "train_loss": 2.981964486837387,
37
- "train_runtime": 542.8101,
38
- "train_samples_per_second": 8.843,
39
- "train_steps_per_second": 0.055
40
  }
41
  ],
42
  "logging_steps": 10,
43
- "max_steps": 30,
44
  "num_input_tokens_seen": 0,
45
  "num_train_epochs": 1,
46
- "save_steps": 30,
47
  "stateful_callbacks": {
48
  "TrainerControl": {
49
  "args": {
@@ -56,7 +35,7 @@
56
  "attributes": {}
57
  }
58
  },
59
- "total_flos": 4.182326538802299e+17,
60
  "train_batch_size": 2,
61
  "trial_name": null,
62
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0091324200913242,
5
  "eval_steps": 0,
6
+ "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0091324200913242,
13
+ "step": 5,
14
+ "total_flos": 6.970544231337165e+16,
15
+ "train_loss": 11.061896514892577,
16
+ "train_runtime": 106.6881,
17
+ "train_samples_per_second": 7.498,
18
+ "train_steps_per_second": 0.047
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 10,
22
+ "max_steps": 5,
23
  "num_input_tokens_seen": 0,
24
  "num_train_epochs": 1,
25
+ "save_steps": 5,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
28
  "args": {
 
35
  "attributes": {}
36
  }
37
  },
38
+ "total_flos": 6.970544231337165e+16,
39
  "train_batch_size": 2,
40
  "trial_name": null,
41
  "trial_params": null