esfrankel17 commited on
Commit
ccc134a
1 Parent(s): 736e538

End of training

Browse files
README.md CHANGED
@@ -56,8 +56,8 @@ The following hyperparameters were used during training:
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:------:|:----:|:---------------:|
59
- | No log | 0.5333 | 1 | 1.8346 |
60
- | No log | 1.6 | 3 | 1.7085 |
61
 
62
 
63
  ### Framework versions
 
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:------:|:----:|:---------------:|
59
+ | 1.9079 | 0.5333 | 1 | 1.8346 |
60
+ | 1.7235 | 1.6 | 3 | 1.7085 |
61
 
62
 
63
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.6,
3
  "eval_loss": 1.7085474729537964,
4
- "eval_runtime": 13.3752,
5
- "eval_samples_per_second": 7.551,
6
- "eval_steps_per_second": 0.299,
7
- "total_flos": 1.9221024474136576e+16,
8
- "train_loss": 1.8231021563212078,
9
- "train_runtime": 1450.8496,
10
- "train_samples_per_second": 3.968,
11
  "train_steps_per_second": 0.002
12
  }
 
1
  {
2
  "epoch": 1.6,
3
  "eval_loss": 1.7085474729537964,
4
+ "eval_runtime": 13.4258,
5
+ "eval_samples_per_second": 7.523,
6
+ "eval_steps_per_second": 0.298,
7
+ "total_flos": 1.922102501100749e+16,
8
+ "train_loss": 1.818156321843465,
9
+ "train_runtime": 1285.2745,
10
+ "train_samples_per_second": 4.479,
11
  "train_steps_per_second": 0.002
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.6,
3
  "eval_loss": 1.7085474729537964,
4
- "eval_runtime": 13.3752,
5
- "eval_samples_per_second": 7.551,
6
- "eval_steps_per_second": 0.299
7
  }
 
1
  {
2
  "epoch": 1.6,
3
  "eval_loss": 1.7085474729537964,
4
+ "eval_runtime": 13.4258,
5
+ "eval_samples_per_second": 7.523,
6
+ "eval_steps_per_second": 0.298
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.6,
3
- "total_flos": 1.9221024474136576e+16,
4
- "train_loss": 1.8231021563212078,
5
- "train_runtime": 1450.8496,
6
- "train_samples_per_second": 3.968,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "epoch": 1.6,
3
+ "total_flos": 1.922102501100749e+16,
4
+ "train_loss": 1.818156321843465,
5
+ "train_runtime": 1285.2745,
6
+ "train_samples_per_second": 4.479,
7
  "train_steps_per_second": 0.002
8
  }
trainer_log.jsonl CHANGED
@@ -1,3 +1,6 @@
1
- {"current_steps": 1, "total_steps": 3, "eval_loss": 1.8345922231674194, "epoch": 0.5333333333333333, "percentage": 33.33, "elapsed_time": "0:14:04", "remaining_time": "0:28:08"}
2
- {"current_steps": 3, "total_steps": 3, "eval_loss": 1.7085474729537964, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:23:22", "remaining_time": "0:00:00"}
3
- {"current_steps": 3, "total_steps": 3, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:24:06", "remaining_time": "0:00:00"}
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 3, "loss": 1.9079, "learning_rate": 5e-06, "epoch": 0.5333333333333333, "percentage": 33.33, "elapsed_time": "0:05:58", "remaining_time": "0:11:56"}
2
+ {"current_steps": 1, "total_steps": 3, "eval_loss": 1.8345922231674194, "epoch": 0.5333333333333333, "percentage": 33.33, "elapsed_time": "0:11:21", "remaining_time": "0:22:42"}
3
+ {"current_steps": 2, "total_steps": 3, "loss": 1.8231, "learning_rate": 5e-06, "epoch": 1.0666666666666667, "percentage": 66.67, "elapsed_time": "0:13:18", "remaining_time": "0:06:39"}
4
+ {"current_steps": 3, "total_steps": 3, "loss": 1.7235, "learning_rate": 5e-06, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:19:12", "remaining_time": "0:00:00"}
5
+ {"current_steps": 3, "total_steps": 3, "eval_loss": 1.7085474729537964, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:20:38", "remaining_time": "0:00:00"}
6
+ {"current_steps": 3, "total_steps": 3, "epoch": 1.6, "percentage": 100.0, "elapsed_time": "0:21:23", "remaining_time": "0:00:00"}
trainer_state.json CHANGED
@@ -8,33 +8,54 @@
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.5333333333333333,
13
  "eval_loss": 1.8345922231674194,
14
- "eval_runtime": 13.5492,
15
- "eval_samples_per_second": 7.454,
16
- "eval_steps_per_second": 0.295,
17
  "step": 1
18
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  {
20
  "epoch": 1.6,
21
  "eval_loss": 1.7085474729537964,
22
- "eval_runtime": 13.4084,
23
- "eval_samples_per_second": 7.533,
24
  "eval_steps_per_second": 0.298,
25
  "step": 3
26
  },
27
  {
28
  "epoch": 1.6,
29
  "step": 3,
30
- "total_flos": 1.9221024474136576e+16,
31
- "train_loss": 1.8231021563212078,
32
- "train_runtime": 1450.8496,
33
- "train_samples_per_second": 3.968,
34
  "train_steps_per_second": 0.002
35
  }
36
  ],
37
- "logging_steps": 10,
38
  "max_steps": 3,
39
  "num_input_tokens_seen": 0,
40
  "num_train_epochs": 3,
@@ -51,7 +72,7 @@
51
  "attributes": {}
52
  }
53
  },
54
- "total_flos": 1.9221024474136576e+16,
55
  "train_batch_size": 4,
56
  "trial_name": null,
57
  "trial_params": null
 
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.5333333333333333,
13
+ "grad_norm": 51.644339588858706,
14
+ "learning_rate": 5e-06,
15
+ "loss": 1.9079,
16
+ "step": 1
17
+ },
18
  {
19
  "epoch": 0.5333333333333333,
20
  "eval_loss": 1.8345922231674194,
21
+ "eval_runtime": 13.5887,
22
+ "eval_samples_per_second": 7.433,
23
+ "eval_steps_per_second": 0.294,
24
  "step": 1
25
  },
26
+ {
27
+ "epoch": 1.0666666666666667,
28
+ "grad_norm": 36.69583629812056,
29
+ "learning_rate": 5e-06,
30
+ "loss": 1.8231,
31
+ "step": 2
32
+ },
33
+ {
34
+ "epoch": 1.6,
35
+ "grad_norm": 121.54965597378022,
36
+ "learning_rate": 5e-06,
37
+ "loss": 1.7235,
38
+ "step": 3
39
+ },
40
  {
41
  "epoch": 1.6,
42
  "eval_loss": 1.7085474729537964,
43
+ "eval_runtime": 13.4334,
44
+ "eval_samples_per_second": 7.519,
45
  "eval_steps_per_second": 0.298,
46
  "step": 3
47
  },
48
  {
49
  "epoch": 1.6,
50
  "step": 3,
51
+ "total_flos": 1.922102501100749e+16,
52
+ "train_loss": 1.818156321843465,
53
+ "train_runtime": 1285.2745,
54
+ "train_samples_per_second": 4.479,
55
  "train_steps_per_second": 0.002
56
  }
57
  ],
58
+ "logging_steps": 1,
59
  "max_steps": 3,
60
  "num_input_tokens_seen": 0,
61
  "num_train_epochs": 3,
 
72
  "attributes": {}
73
  }
74
  },
75
+ "total_flos": 1.922102501100749e+16,
76
  "train_batch_size": 4,
77
  "trial_name": null,
78
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b6bbd7933c9b40eb1823cb2c3687ea40b57253d5960d77ed2a4d8a1912f48c2
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdcdf4da3fb2173db88db2186c8fdb9c237862dd7136c92e0cae4c0d9ae2279
3
  size 7288
training_loss.png ADDED