Arotte commited on
Commit
abbd8fc
1 Parent(s): 3505e13

Update models files

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.4645697044890593,
4
- "eval_loss": 2.4816598892211914,
5
- "eval_runtime": 13.9852,
6
- "eval_samples": 208,
7
- "eval_samples_per_second": 14.873,
8
- "eval_steps_per_second": 3.003,
9
- "perplexity": 11.961102053527732,
10
- "train_loss": 2.365706356890222,
11
- "train_runtime": 6608.9309,
12
- "train_samples": 1741,
13
- "train_samples_per_second": 5.269,
14
- "train_steps_per_second": 1.056
15
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.48530345824830734,
4
+ "eval_loss": 2.389514446258545,
5
+ "eval_runtime": 14.0452,
6
+ "eval_samples": 217,
7
+ "eval_samples_per_second": 15.45,
8
+ "eval_steps_per_second": 3.133,
9
+ "perplexity": 10.908196141513345,
10
+ "train_loss": 2.27839626364512,
11
+ "train_runtime": 6672.7861,
12
+ "train_samples": 1821,
13
+ "train_samples_per_second": 5.458,
14
+ "train_steps_per_second": 1.094
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.4645697044890593,
4
- "eval_loss": 2.4816598892211914,
5
- "eval_runtime": 13.9852,
6
- "eval_samples": 208,
7
- "eval_samples_per_second": 14.873,
8
- "eval_steps_per_second": 3.003,
9
- "perplexity": 11.961102053527732
10
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.48530345824830734,
4
+ "eval_loss": 2.389514446258545,
5
+ "eval_runtime": 14.0452,
6
+ "eval_samples": 217,
7
+ "eval_samples_per_second": 15.45,
8
+ "eval_steps_per_second": 3.133,
9
+ "perplexity": 10.908196141513345
10
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fbe42a51327e3e5df35ce7a6c54b6c54037dcab0173a4669f0a835ad42693af
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6570a17556515668606980d04afad5c4884f6be43b321989ef611686f83c2356
3
  size 510398013
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 2.365706356890222,
4
- "train_runtime": 6608.9309,
5
- "train_samples": 1741,
6
- "train_samples_per_second": 5.269,
7
- "train_steps_per_second": 1.056
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 2.27839626364512,
4
+ "train_runtime": 6672.7861,
5
+ "train_samples": 1821,
6
+ "train_samples_per_second": 5.458,
7
+ "train_steps_per_second": 1.094
8
  }
trainer_state.json CHANGED
@@ -2,102 +2,108 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 20.0,
5
- "global_step": 6980,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.43,
12
- "learning_rate": 4.641833810888253e-05,
13
- "loss": 3.1564,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 2.87,
18
- "learning_rate": 4.2836676217765046e-05,
19
- "loss": 2.6213,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 4.3,
24
- "learning_rate": 3.9255014326647564e-05,
25
- "loss": 2.503,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 5.73,
30
- "learning_rate": 3.567335243553009e-05,
31
- "loss": 2.4353,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 7.16,
36
- "learning_rate": 3.2091690544412614e-05,
37
- "loss": 2.3785,
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 8.6,
42
- "learning_rate": 2.851002865329513e-05,
43
- "loss": 2.3302,
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 10.03,
48
- "learning_rate": 2.492836676217765e-05,
49
- "loss": 2.2988,
50
  "step": 3500
51
  },
52
  {
53
- "epoch": 11.46,
54
- "learning_rate": 2.1346704871060173e-05,
55
- "loss": 2.2569,
56
  "step": 4000
57
  },
58
  {
59
- "epoch": 12.89,
60
- "learning_rate": 1.7765042979942695e-05,
61
- "loss": 2.235,
62
  "step": 4500
63
  },
64
  {
65
- "epoch": 14.33,
66
- "learning_rate": 1.4183381088825215e-05,
67
- "loss": 2.2087,
68
  "step": 5000
69
  },
70
  {
71
- "epoch": 15.76,
72
- "learning_rate": 1.0601719197707736e-05,
73
- "loss": 2.19,
74
  "step": 5500
75
  },
76
  {
77
- "epoch": 17.19,
78
- "learning_rate": 7.020057306590258e-06,
79
- "loss": 2.1762,
80
  "step": 6000
81
  },
82
  {
83
- "epoch": 18.62,
84
- "learning_rate": 3.4383954154727795e-06,
85
- "loss": 2.1656,
86
  "step": 6500
87
  },
 
 
 
 
 
 
88
  {
89
  "epoch": 20.0,
90
- "step": 6980,
91
- "total_flos": 1.819637710848e+16,
92
- "train_loss": 2.365706356890222,
93
- "train_runtime": 6608.9309,
94
- "train_samples_per_second": 5.269,
95
- "train_steps_per_second": 1.056
96
  }
97
  ],
98
- "max_steps": 6980,
99
  "num_train_epochs": 20,
100
- "total_flos": 1.819637710848e+16,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 20.0,
5
+ "global_step": 7300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.37,
12
+ "learning_rate": 4.657534246575342e-05,
13
+ "loss": 3.0871,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 2.74,
18
+ "learning_rate": 4.3150684931506855e-05,
19
+ "loss": 2.5357,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 4.11,
24
+ "learning_rate": 3.9726027397260274e-05,
25
+ "loss": 2.4276,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 5.48,
30
+ "learning_rate": 3.63013698630137e-05,
31
+ "loss": 2.3554,
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 6.85,
36
+ "learning_rate": 3.287671232876712e-05,
37
+ "loss": 2.3018,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 8.22,
42
+ "learning_rate": 2.945205479452055e-05,
43
+ "loss": 2.2534,
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 9.59,
48
+ "learning_rate": 2.6027397260273973e-05,
49
+ "loss": 2.2174,
50
  "step": 3500
51
  },
52
  {
53
+ "epoch": 10.96,
54
+ "learning_rate": 2.2602739726027396e-05,
55
+ "loss": 2.1862,
56
  "step": 4000
57
  },
58
  {
59
+ "epoch": 12.33,
60
+ "learning_rate": 1.9178082191780822e-05,
61
+ "loss": 2.1502,
62
  "step": 4500
63
  },
64
  {
65
+ "epoch": 13.7,
66
+ "learning_rate": 1.5753424657534248e-05,
67
+ "loss": 2.1359,
68
  "step": 5000
69
  },
70
  {
71
+ "epoch": 15.07,
72
+ "learning_rate": 1.2328767123287671e-05,
73
+ "loss": 2.1134,
74
  "step": 5500
75
  },
76
  {
77
+ "epoch": 16.44,
78
+ "learning_rate": 8.904109589041095e-06,
79
+ "loss": 2.0959,
80
  "step": 6000
81
  },
82
  {
83
+ "epoch": 17.81,
84
+ "learning_rate": 5.479452054794521e-06,
85
+ "loss": 2.0852,
86
  "step": 6500
87
  },
88
+ {
89
+ "epoch": 19.18,
90
+ "learning_rate": 2.054794520547945e-06,
91
+ "loss": 2.0761,
92
+ "step": 7000
93
+ },
94
  {
95
  "epoch": 20.0,
96
+ "step": 7300,
97
+ "total_flos": 1.903251161088e+16,
98
+ "train_loss": 2.27839626364512,
99
+ "train_runtime": 6672.7861,
100
+ "train_samples_per_second": 5.458,
101
+ "train_steps_per_second": 1.094
102
  }
103
  ],
104
+ "max_steps": 7300,
105
  "num_train_epochs": 20,
106
+ "total_flos": 1.903251161088e+16,
107
  "trial_name": null,
108
  "trial_params": null
109
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:338aec6a31bc28ca0c79c5ca1e08c265007bfaef7330233806b41e142086cb62
3
  size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c49a916d9749c59a961b830f7c5ca0094acf4b9aba10491867872db48bd85ef
3
  size 3515