cezeozue commited on
Commit
dafe4f5
1 Parent(s): 660dafc

Training in progress, step 2000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77c06d9ef52dfd6cb1290a5937123ea6ad62407a08194a9f06b6857d478ccb34
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d67e381b422c395db768ed23034a2a8bba5e803f3f1aff15b4c13df8440e2fc
3
  size 268290900
run-0/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:295c6c1f872ff632f863e58a5986fe3864390ff88e5981b65632027c9c57d453
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d67e381b422c395db768ed23034a2a8bba5e803f3f1aff15b4c13df8440e2fc
3
  size 268290900
run-0/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5da8d6971d3d4157e629934f7fd366eabb0abb8463a0e04b9c07e013ea6ad67
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b729784501b7eea68616711ea8b128a6303122c7ad69180933e842861cfd56d8
3
  size 536643898
run-0/checkpoint-2000/trainer_state.json CHANGED
@@ -10,80 +10,80 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5880645161290322,
14
- "eval_loss": 0.20538708567619324,
15
- "eval_runtime": 5.2125,
16
- "eval_samples_per_second": 594.727,
17
- "eval_steps_per_second": 12.47,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.550763701707098e-05,
23
- "loss": 0.3249,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8335483870967741,
29
- "eval_loss": 0.1016974076628685,
30
- "eval_runtime": 5.4495,
31
- "eval_samples_per_second": 568.859,
32
- "eval_steps_per_second": 11.928,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8848387096774194,
38
- "eval_loss": 0.06891165673732758,
39
- "eval_runtime": 5.6248,
40
- "eval_samples_per_second": 551.129,
41
- "eval_steps_per_second": 11.556,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.101527403414196e-05,
47
- "loss": 0.1169,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.8993548387096775,
53
- "eval_loss": 0.05370178818702698,
54
- "eval_runtime": 5.6282,
55
- "eval_samples_per_second": 550.795,
56
- "eval_steps_per_second": 11.549,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 6.522911051212939e-06,
62
- "loss": 0.0768,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 5.0,
67
- "eval_accuracy": 0.91,
68
- "eval_loss": 0.04600737988948822,
69
- "eval_runtime": 5.691,
70
- "eval_samples_per_second": 544.716,
71
- "eval_steps_per_second": 11.421,
72
  "step": 1590
73
  },
74
  {
75
  "epoch": 6.0,
76
- "eval_accuracy": 0.912258064516129,
77
- "eval_loss": 0.04226335510611534,
78
- "eval_runtime": 5.7066,
79
- "eval_samples_per_second": 543.231,
80
- "eval_steps_per_second": 11.39,
81
  "step": 1908
82
  },
83
  {
84
  "epoch": 6.29,
85
  "learning_rate": 2.0305480682839176e-06,
86
- "loss": 0.0634,
87
  "step": 2000
88
  }
89
  ],
@@ -94,8 +94,8 @@
94
  "total_flos": 519927215063004.0,
95
  "trial_name": null,
96
  "trial_params": {
97
- "alpha": 0.47065466175612003,
98
  "num_train_epochs": 7,
99
- "temperature": 11
100
  }
101
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5829032258064516,
14
+ "eval_loss": 0.20076851546764374,
15
+ "eval_runtime": 5.4347,
16
+ "eval_samples_per_second": 570.412,
17
+ "eval_steps_per_second": 11.96,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.550763701707098e-05,
23
+ "loss": 0.3179,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8312903225806452,
29
+ "eval_loss": 0.09997060149908066,
30
+ "eval_runtime": 5.5066,
31
+ "eval_samples_per_second": 562.957,
32
+ "eval_steps_per_second": 11.804,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.885483870967742,
38
+ "eval_loss": 0.06820663809776306,
39
+ "eval_runtime": 5.3564,
40
+ "eval_samples_per_second": 578.748,
41
+ "eval_steps_per_second": 12.135,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.101527403414196e-05,
47
+ "loss": 0.1149,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.8987096774193548,
53
+ "eval_loss": 0.05336497724056244,
54
+ "eval_runtime": 5.5937,
55
+ "eval_samples_per_second": 554.197,
56
+ "eval_steps_per_second": 11.62,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 6.522911051212939e-06,
62
+ "loss": 0.0759,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 5.0,
67
+ "eval_accuracy": 0.9093548387096774,
68
+ "eval_loss": 0.04582875967025757,
69
+ "eval_runtime": 6.0528,
70
+ "eval_samples_per_second": 512.159,
71
+ "eval_steps_per_second": 10.739,
72
  "step": 1590
73
  },
74
  {
75
  "epoch": 6.0,
76
+ "eval_accuracy": 0.9116129032258065,
77
+ "eval_loss": 0.04215848818421364,
78
+ "eval_runtime": 5.6245,
79
+ "eval_samples_per_second": 551.164,
80
+ "eval_steps_per_second": 11.557,
81
  "step": 1908
82
  },
83
  {
84
  "epoch": 6.29,
85
  "learning_rate": 2.0305480682839176e-06,
86
+ "loss": 0.0628,
87
  "step": 2000
88
  }
89
  ],
 
94
  "total_flos": 519927215063004.0,
95
  "trial_name": null,
96
  "trial_params": {
97
+ "alpha": 0.3888910320919544,
98
  "num_train_epochs": 7,
99
+ "temperature": 14
100
  }
101
  }
run-0/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9875ecc9817a22000d7b6d79d7630f539ed7cf56aea4abb3a9a7bda735d6a72d
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ff8148ef350bdda6fc8fdc944910a6b03a9fdbe918074b22f041373f4f085f
3
  size 4664
runs/Jan25_17-40-54_c146da53f02f/events.out.tfevents.1706205085.c146da53f02f.3236.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b8d1a16722860450d51e7c01e4473120c44d056eeb92421a7a32afcb65d9de8
3
- size 13927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a8f1f8f41aa8ccd8df5646a9ec6a75176493f9441b4d95a72304127ef0c7b9b
3
+ size 14407