AmberYifan commited on
Commit
cdf3864
·
verified ·
1 Parent(s): 9e70e66

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7529569604704457,
5
- "train_runtime": 771.983,
6
  "train_samples": 1992,
7
- "train_samples_per_second": 2.58,
8
- "train_steps_per_second": 0.08
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7647512930054818,
5
+ "train_runtime": 925.9529,
6
  "train_samples": 1992,
7
+ "train_samples_per_second": 2.151,
8
+ "train_steps_per_second": 0.067
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41bfc87b2d8a847603ebea51bc7b25a7613debce011bcfbe088bfa496c174c9a
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708727674b50dc5363238c8722b7a99c5425bef018d70c004456157fbf402eaa
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0124ffe62940ff4d539444bb6b7a81f15de9221129dfbf3d3345bde87bc06682
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1ab5bcdf2b1b4cb86ced3c5e795d5a159b13b7ec3915cfea539ef8d546ffc3
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e995cd19230e32acb39d34266352b974860b4fb26908e29bb7a503b901bb78d4
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b54db3cdfc585ca1216e80487da4b7662f396502f0ff0d350a6b7ef0bc0d9eef
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e85406baa864bc5a96061ef5ab1e6119a0bac267d985a72e6e80184110e161
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e156cfe6001cbfc0267d383dfdfd04eac8730b8f861d1273664ef256342d96a1
3
  size 1089994880
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7529569604704457,
5
- "train_runtime": 771.983,
6
  "train_samples": 1992,
7
- "train_samples_per_second": 2.58,
8
- "train_steps_per_second": 0.08
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7647512930054818,
5
+ "train_runtime": 925.9529,
6
  "train_samples": 1992,
7
+ "train_samples_per_second": 2.151,
8
+ "train_steps_per_second": 0.067
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
- "grad_norm": 90.30108238635525,
14
  "learning_rate": 7.142857142857142e-08,
15
  "logits/generated": -0.7607710957527161,
16
  "logits/real": -0.3239991366863251,
@@ -25,102 +25,102 @@
25
  },
26
  {
27
  "epoch": 0.16,
28
- "grad_norm": 81.98515918889403,
29
  "learning_rate": 4.727272727272727e-07,
30
- "logits/generated": -0.9451051354408264,
31
- "logits/real": -0.6303450465202332,
32
- "logps/generated": -261.0451354980469,
33
- "logps/real": -270.76763916015625,
34
- "loss": 0.9227,
35
- "rewards/accuracies": 0.625,
36
- "rewards/generated": 0.056389763951301575,
37
- "rewards/margins": 0.14121507108211517,
38
- "rewards/real": 0.19760483503341675,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
- "grad_norm": 174.7113626218452,
44
  "learning_rate": 3.818181818181818e-07,
45
- "logits/generated": -0.9395920634269714,
46
- "logits/real": -0.5305559635162354,
47
- "logps/generated": -255.8331756591797,
48
- "logps/real": -262.07177734375,
49
- "loss": 0.7746,
50
- "rewards/accuracies": 0.8125,
51
- "rewards/generated": 0.9409279823303223,
52
- "rewards/margins": 0.9559415578842163,
53
- "rewards/real": 1.896869421005249,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
- "grad_norm": 59.116404789366975,
59
  "learning_rate": 2.909090909090909e-07,
60
- "logits/generated": -0.9359496831893921,
61
- "logits/real": -0.592766523361206,
62
- "logps/generated": -248.56893920898438,
63
- "logps/real": -255.5909881591797,
64
- "loss": 0.7096,
65
- "rewards/accuracies": 0.7124999761581421,
66
- "rewards/generated": 1.2769505977630615,
67
- "rewards/margins": 0.9968850016593933,
68
- "rewards/real": 2.2738356590270996,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
- "grad_norm": 92.60746313254718,
74
  "learning_rate": 2e-07,
75
- "logits/generated": -0.8351278305053711,
76
- "logits/real": -0.5679959058761597,
77
- "logps/generated": -251.27236938476562,
78
- "logps/real": -241.02664184570312,
79
- "loss": 0.6859,
80
- "rewards/accuracies": 0.8500000238418579,
81
- "rewards/generated": 1.4600062370300293,
82
- "rewards/margins": 1.1287152767181396,
83
- "rewards/real": 2.588721752166748,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
- "grad_norm": 85.79598787282833,
89
  "learning_rate": 1.0909090909090908e-07,
90
- "logits/generated": -0.9641457796096802,
91
- "logits/real": -0.6498032808303833,
92
- "logps/generated": -250.719482421875,
93
- "logps/real": -241.3539581298828,
94
- "loss": 0.6968,
95
- "rewards/accuracies": 0.762499988079071,
96
- "rewards/generated": 1.4068793058395386,
97
- "rewards/margins": 1.1703944206237793,
98
- "rewards/real": 2.5772736072540283,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
- "grad_norm": 68.73888204600256,
104
  "learning_rate": 1.818181818181818e-08,
105
- "logits/generated": -0.8538883924484253,
106
- "logits/real": -0.608859658241272,
107
- "logps/generated": -255.9438934326172,
108
- "logps/real": -255.7367706298828,
109
- "loss": 0.6938,
110
- "rewards/accuracies": 0.737500011920929,
111
- "rewards/generated": 1.5117708444595337,
112
- "rewards/margins": 1.1012026071548462,
113
- "rewards/real": 2.61297345161438,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
- "train_loss": 0.7529569604704457,
121
- "train_runtime": 771.983,
122
- "train_samples_per_second": 2.58,
123
- "train_steps_per_second": 0.08
124
  }
125
  ],
126
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
+ "grad_norm": 90.36061761169043,
14
  "learning_rate": 7.142857142857142e-08,
15
  "logits/generated": -0.7607710957527161,
16
  "logits/real": -0.3239991366863251,
 
25
  },
26
  {
27
  "epoch": 0.16,
28
+ "grad_norm": 80.57778361752598,
29
  "learning_rate": 4.727272727272727e-07,
30
+ "logits/generated": -0.9439048767089844,
31
+ "logits/real": -0.6272310018539429,
32
+ "logps/generated": -260.8728942871094,
33
+ "logps/real": -270.6895446777344,
34
+ "loss": 0.9236,
35
+ "rewards/accuracies": 0.5972222089767456,
36
+ "rewards/generated": 0.07361925393342972,
37
+ "rewards/margins": 0.1317952275276184,
38
+ "rewards/real": 0.20541447401046753,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
+ "grad_norm": 184.2526206474729,
44
  "learning_rate": 3.818181818181818e-07,
45
+ "logits/generated": -0.9683843851089478,
46
+ "logits/real": -0.5614625215530396,
47
+ "logps/generated": -255.9654998779297,
48
+ "logps/real": -262.6753845214844,
49
+ "loss": 0.7753,
50
+ "rewards/accuracies": 0.7875000238418579,
51
+ "rewards/generated": 0.9276968240737915,
52
+ "rewards/margins": 0.9088083505630493,
53
+ "rewards/real": 1.8365051746368408,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
+ "grad_norm": 58.389369897312676,
59
  "learning_rate": 2.909090909090909e-07,
60
+ "logits/generated": -1.0661559104919434,
61
+ "logits/real": -0.7151978015899658,
62
+ "logps/generated": -250.697509765625,
63
+ "logps/real": -258.60211181640625,
64
+ "loss": 0.7304,
65
+ "rewards/accuracies": 0.6875,
66
+ "rewards/generated": 1.0640974044799805,
67
+ "rewards/margins": 0.9086271524429321,
68
+ "rewards/real": 1.9727245569229126,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
+ "grad_norm": 131.96437613532245,
74
  "learning_rate": 2e-07,
75
+ "logits/generated": -0.9631511569023132,
76
+ "logits/real": -0.6713980436325073,
77
+ "logps/generated": -252.99539184570312,
78
+ "logps/real": -243.7881317138672,
79
+ "loss": 0.7048,
80
+ "rewards/accuracies": 0.762499988079071,
81
+ "rewards/generated": 1.2877063751220703,
82
+ "rewards/margins": 1.0248647928237915,
83
+ "rewards/real": 2.3125712871551514,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
+ "grad_norm": 351.71434098117106,
89
  "learning_rate": 1.0909090909090908e-07,
90
+ "logits/generated": -1.0679857730865479,
91
+ "logits/real": -0.7241894602775574,
92
+ "logps/generated": -252.9150848388672,
93
+ "logps/real": -244.3249053955078,
94
+ "loss": 0.7103,
95
+ "rewards/accuracies": 0.7749999761581421,
96
+ "rewards/generated": 1.1873204708099365,
97
+ "rewards/margins": 1.0928575992584229,
98
+ "rewards/real": 2.2801780700683594,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
+ "grad_norm": 102.7524068481551,
104
  "learning_rate": 1.818181818181818e-08,
105
+ "logits/generated": -0.8544826507568359,
106
+ "logits/real": -0.6093840003013611,
107
+ "logps/generated": -257.0255432128906,
108
+ "logps/real": -257.3138122558594,
109
+ "loss": 0.7126,
110
+ "rewards/accuracies": 0.7875000238418579,
111
+ "rewards/generated": 1.4036052227020264,
112
+ "rewards/margins": 1.051663875579834,
113
+ "rewards/real": 2.4552693367004395,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
+ "train_loss": 0.7647512930054818,
121
+ "train_runtime": 925.9529,
122
+ "train_samples_per_second": 2.151,
123
+ "train_steps_per_second": 0.067
124
  }
125
  ],
126
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cd49007af7a5717ec35e8140174a325191a6be53da07a59c00045fe784b3650
3
  size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b763a587e39fee66e457f0de0320ba8e9a9dc5b927255da7b1324a7772b90da
3
  size 6392