louis030195 commited on
Commit
0b686dd
1 Parent(s): 7f9c9d2

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 3.314453125,
4
- "eval_runtime": 2.4108,
5
  "eval_samples": 9,
6
- "eval_samples_per_second": 3.733,
7
- "eval_steps_per_second": 0.83,
8
- "perplexity": 27.507346790200362,
9
- "train_loss": 2.098372395833333,
10
- "train_runtime": 945.6359,
11
  "train_samples": 23,
12
- "train_samples_per_second": 1.216,
13
- "train_steps_per_second": 0.159
14
  }
 
1
  {
2
+ "epoch": 500.0,
3
+ "eval_loss": 6.0234375,
4
+ "eval_runtime": 2.3608,
5
  "eval_samples": 9,
6
+ "eval_samples_per_second": 3.812,
7
+ "eval_steps_per_second": 0.847,
8
+ "perplexity": 412.9958316281291,
9
+ "train_loss": 0.36582159678141274,
10
+ "train_runtime": 9345.1631,
11
  "train_samples": 23,
12
+ "train_samples_per_second": 1.231,
13
+ "train_steps_per_second": 0.161
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 3.314453125,
4
- "eval_runtime": 2.4108,
5
  "eval_samples": 9,
6
- "eval_samples_per_second": 3.733,
7
- "eval_steps_per_second": 0.83,
8
- "perplexity": 27.507346790200362
9
  }
 
1
  {
2
+ "epoch": 500.0,
3
+ "eval_loss": 6.0234375,
4
+ "eval_runtime": 2.3608,
5
  "eval_samples": 9,
6
+ "eval_samples_per_second": 3.812,
7
+ "eval_steps_per_second": 0.847,
8
+ "perplexity": 412.9958316281291
9
  }
runs/Jan28_16-59-28_1f25944bc523/events.out.tfevents.1643399381.1f25944bc523.834.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11753d86902fa8693e53663b0283d1f2b2824f2b91fee284d12df8565a58c955
3
+ size 311
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 2.098372395833333,
4
- "train_runtime": 945.6359,
5
  "train_samples": 23,
6
- "train_samples_per_second": 1.216,
7
- "train_steps_per_second": 0.159
8
  }
 
1
  {
2
+ "epoch": 500.0,
3
+ "train_loss": 0.36582159678141274,
4
+ "train_runtime": 9345.1631,
5
  "train_samples": 23,
6
+ "train_samples_per_second": 1.231,
7
+ "train_steps_per_second": 0.161
8
  }
trainer_state.json CHANGED
@@ -1,25 +1,99 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 50.0,
5
- "global_step": 150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 50.0,
12
- "step": 150,
13
- "total_flos": 300491279958016.0,
14
- "train_loss": 2.098372395833333,
15
- "train_runtime": 945.6359,
16
- "train_samples_per_second": 1.216,
17
- "train_steps_per_second": 0.159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
- "max_steps": 150,
21
- "num_train_epochs": 50,
22
- "total_flos": 300491279958016.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 500.0,
5
+ "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 66.67,
12
+ "eval_loss": 3.64453125,
13
+ "eval_runtime": 2.3703,
14
+ "eval_samples_per_second": 3.797,
15
+ "eval_steps_per_second": 0.844,
16
+ "step": 200
17
+ },
18
+ {
19
+ "epoch": 133.33,
20
+ "eval_loss": 4.5703125,
21
+ "eval_runtime": 2.3618,
22
+ "eval_samples_per_second": 3.811,
23
+ "eval_steps_per_second": 0.847,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 166.67,
28
+ "learning_rate": 5e-05,
29
+ "loss": 1.0101,
30
+ "step": 500
31
+ },
32
+ {
33
+ "epoch": 200.0,
34
+ "eval_loss": 5.2109375,
35
+ "eval_runtime": 2.3656,
36
+ "eval_samples_per_second": 3.805,
37
+ "eval_steps_per_second": 0.845,
38
+ "step": 600
39
+ },
40
+ {
41
+ "epoch": 266.67,
42
+ "eval_loss": 5.54296875,
43
+ "eval_runtime": 2.3648,
44
+ "eval_samples_per_second": 3.806,
45
+ "eval_steps_per_second": 0.846,
46
+ "step": 800
47
+ },
48
+ {
49
+ "epoch": 333.33,
50
+ "learning_rate": 5e-05,
51
+ "loss": 0.0681,
52
+ "step": 1000
53
+ },
54
+ {
55
+ "epoch": 333.33,
56
+ "eval_loss": 5.72265625,
57
+ "eval_runtime": 2.3619,
58
+ "eval_samples_per_second": 3.81,
59
+ "eval_steps_per_second": 0.847,
60
+ "step": 1000
61
+ },
62
+ {
63
+ "epoch": 400.0,
64
+ "eval_loss": 5.8671875,
65
+ "eval_runtime": 2.3493,
66
+ "eval_samples_per_second": 3.831,
67
+ "eval_steps_per_second": 0.851,
68
+ "step": 1200
69
+ },
70
+ {
71
+ "epoch": 466.67,
72
+ "eval_loss": 5.99609375,
73
+ "eval_runtime": 2.3621,
74
+ "eval_samples_per_second": 3.81,
75
+ "eval_steps_per_second": 0.847,
76
+ "step": 1400
77
+ },
78
+ {
79
+ "epoch": 500.0,
80
+ "learning_rate": 5e-05,
81
+ "loss": 0.0193,
82
+ "step": 1500
83
+ },
84
+ {
85
+ "epoch": 500.0,
86
+ "step": 1500,
87
+ "total_flos": 3004912665362432.0,
88
+ "train_loss": 0.36582159678141274,
89
+ "train_runtime": 9345.1631,
90
+ "train_samples_per_second": 1.231,
91
+ "train_steps_per_second": 0.161
92
  }
93
  ],
94
+ "max_steps": 1500,
95
+ "num_train_epochs": 500,
96
+ "total_flos": 3004912665362432.0,
97
  "trial_name": null,
98
  "trial_params": null
99
  }