AmberYifan commited on
Commit
b4d174e
1 Parent(s): f729199

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.11703237614804698,
5
- "train_runtime": 741.0872,
6
  "train_samples": 1999,
7
- "train_samples_per_second": 2.697,
8
- "train_steps_per_second": 0.084
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.11182688801519332,
5
+ "train_runtime": 795.5053,
6
  "train_samples": 1999,
7
+ "train_samples_per_second": 2.513,
8
+ "train_steps_per_second": 0.078
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27ad26c1efb5aa41fd1bd110ee2d93f2148ffdfe1d5e372f09746ec1fdb98bab
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17bb78f523e256503e66d9406ec868a6e723aa56c5982147498f98dab0eaac0e
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff4623531b30340c287dfa8e76147ab4a1322e6ab8b7db075e066f9ebfad74b7
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c813513c413582518b6d3390a8c7925bda0a0df487cab3c9bd8ef8cd607fa3c0
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:917b65259f3629f7e4fe0111806e6f184ae9e604addb6954ca70748683196a5b
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b66ccc91c76d7d1f0cd372582bfb47f501ee6d5b9963d0210015b06c0a281050
3
  size 4540516344
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.11703237614804698,
5
- "train_runtime": 741.0872,
6
  "train_samples": 1999,
7
- "train_samples_per_second": 2.697,
8
- "train_steps_per_second": 0.084
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.11182688801519332,
5
+ "train_runtime": 795.5053,
6
  "train_samples": 1999,
7
+ "train_samples_per_second": 2.513,
8
+ "train_steps_per_second": 0.078
9
  }
trainer_state.json CHANGED
@@ -10,13 +10,13 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
- "grad_norm": 368.63389421513955,
14
  "learning_rate": 7.142857142857142e-08,
15
- "logits/generated": -2.9934349060058594,
16
- "logits/real": -2.747741222381592,
17
- "logps/generated": -240.1473388671875,
18
- "logps/real": -93.4220199584961,
19
- "loss": 0.7913,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
@@ -25,102 +25,102 @@
25
  },
26
  {
27
  "epoch": 0.16,
28
- "grad_norm": 1.784860986873838,
29
  "learning_rate": 4.727272727272727e-07,
30
- "logits/generated": -2.8636107444763184,
31
- "logits/real": -2.1776232719421387,
32
- "logps/generated": -275.1824951171875,
33
- "logps/real": -95.4654769897461,
34
- "loss": 0.2707,
35
  "rewards/accuracies": 0.8888888955116272,
36
- "rewards/generated": -3.9822473526000977,
37
- "rewards/margins": 5.366815090179443,
38
- "rewards/real": 1.3845678567886353,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
- "grad_norm": 1.195516804976816,
44
  "learning_rate": 3.818181818181818e-07,
45
- "logits/generated": -3.074063777923584,
46
- "logits/real": -2.132559299468994,
47
- "logps/generated": -325.47174072265625,
48
- "logps/real": -77.88875579833984,
49
- "loss": 0.0846,
50
  "rewards/accuracies": 1.0,
51
- "rewards/generated": -9.100537300109863,
52
- "rewards/margins": 12.047636985778809,
53
- "rewards/real": 2.947101593017578,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
- "grad_norm": 1.781989470530369,
59
  "learning_rate": 2.909090909090909e-07,
60
- "logits/generated": -3.0295379161834717,
61
- "logits/real": -2.112544536590576,
62
- "logps/generated": -355.9772033691406,
63
- "logps/real": -76.08865356445312,
64
- "loss": 0.0782,
65
  "rewards/accuracies": 1.0,
66
- "rewards/generated": -11.466151237487793,
67
- "rewards/margins": 14.567548751831055,
68
- "rewards/real": 3.1013970375061035,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
- "grad_norm": 0.9655945444920763,
74
  "learning_rate": 2e-07,
75
- "logits/generated": -3.012608051300049,
76
- "logits/real": -2.2118849754333496,
77
- "logps/generated": -348.3521423339844,
78
- "logps/real": -83.44706726074219,
79
- "loss": 0.078,
80
  "rewards/accuracies": 1.0,
81
- "rewards/generated": -11.384729385375977,
82
- "rewards/margins": 14.619766235351562,
83
- "rewards/real": 3.235036849975586,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
- "grad_norm": 1.4386865003268576,
89
  "learning_rate": 1.0909090909090908e-07,
90
- "logits/generated": -3.0476737022399902,
91
- "logits/real": -2.1677966117858887,
92
- "logps/generated": -343.5052185058594,
93
- "logps/real": -82.69391632080078,
94
- "loss": 0.0699,
95
  "rewards/accuracies": 1.0,
96
- "rewards/generated": -11.07672119140625,
97
- "rewards/margins": 14.31847095489502,
98
- "rewards/real": 3.2417500019073486,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
- "grad_norm": 0.917235829369125,
104
  "learning_rate": 1.818181818181818e-08,
105
- "logits/generated": -3.0156235694885254,
106
- "logits/real": -2.1144680976867676,
107
- "logps/generated": -349.30865478515625,
108
- "logps/real": -80.76779174804688,
109
- "loss": 0.0799,
110
  "rewards/accuracies": 1.0,
111
- "rewards/generated": -11.362370491027832,
112
- "rewards/margins": 14.571017265319824,
113
- "rewards/real": 3.208648681640625,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
- "train_loss": 0.11703237614804698,
121
- "train_runtime": 741.0872,
122
- "train_samples_per_second": 2.697,
123
- "train_steps_per_second": 0.084
124
  }
125
  ],
126
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
+ "grad_norm": 307.76555800630456,
14
  "learning_rate": 7.142857142857142e-08,
15
+ "logits/generated": -2.8404788970947266,
16
+ "logits/real": -2.5973095893859863,
17
+ "logps/generated": -199.41073608398438,
18
+ "logps/real": -84.76593017578125,
19
+ "loss": 0.7771,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.16,
28
+ "grad_norm": 8.428438696613153,
29
  "learning_rate": 4.727272727272727e-07,
30
+ "logits/generated": -2.9082934856414795,
31
+ "logits/real": -2.201085329055786,
32
+ "logps/generated": -260.89495849609375,
33
+ "logps/real": -89.60836791992188,
34
+ "loss": 0.2691,
35
  "rewards/accuracies": 0.8888888955116272,
36
+ "rewards/generated": -3.8854570388793945,
37
+ "rewards/margins": 5.224937438964844,
38
+ "rewards/real": 1.3394801616668701,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
+ "grad_norm": 2.6808156850513947,
44
  "learning_rate": 3.818181818181818e-07,
45
+ "logits/generated": -3.0079009532928467,
46
+ "logits/real": -2.238185167312622,
47
+ "logps/generated": -310.7685241699219,
48
+ "logps/real": -82.25010681152344,
49
+ "loss": 0.0707,
50
  "rewards/accuracies": 1.0,
51
+ "rewards/generated": -8.675054550170898,
52
+ "rewards/margins": 11.541936874389648,
53
+ "rewards/real": 2.866882562637329,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
+ "grad_norm": 0.7630367545230997,
59
  "learning_rate": 2.909090909090909e-07,
60
+ "logits/generated": -3.0362448692321777,
61
+ "logits/real": -2.1702117919921875,
62
+ "logps/generated": -325.0323181152344,
63
+ "logps/real": -72.29302978515625,
64
+ "loss": 0.0691,
65
  "rewards/accuracies": 1.0,
66
+ "rewards/generated": -9.070144653320312,
67
+ "rewards/margins": 12.167816162109375,
68
+ "rewards/real": 3.0976719856262207,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
+ "grad_norm": 0.8045898247287383,
74
  "learning_rate": 2e-07,
75
+ "logits/generated": -3.0557637214660645,
76
+ "logits/real": -2.27079176902771,
77
+ "logps/generated": -326.16363525390625,
78
+ "logps/real": -73.30236053466797,
79
+ "loss": 0.0736,
80
  "rewards/accuracies": 1.0,
81
+ "rewards/generated": -10.190264701843262,
82
+ "rewards/margins": 13.376245498657227,
83
+ "rewards/real": 3.1859793663024902,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
+ "grad_norm": 0.9553834700952871,
89
  "learning_rate": 1.0909090909090908e-07,
90
+ "logits/generated": -3.051811933517456,
91
+ "logits/real": -2.1524760723114014,
92
+ "logps/generated": -327.13031005859375,
93
+ "logps/real": -71.540771484375,
94
+ "loss": 0.0724,
95
  "rewards/accuracies": 1.0,
96
+ "rewards/generated": -10.402329444885254,
97
+ "rewards/margins": 13.604934692382812,
98
+ "rewards/real": 3.2026054859161377,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
+ "grad_norm": 0.8111910021687768,
104
  "learning_rate": 1.818181818181818e-08,
105
+ "logits/generated": -3.0647284984588623,
106
+ "logits/real": -2.1906068325042725,
107
+ "logps/generated": -329.8739318847656,
108
+ "logps/real": -82.74003601074219,
109
+ "loss": 0.0694,
110
  "rewards/accuracies": 1.0,
111
+ "rewards/generated": -10.355340003967285,
112
+ "rewards/margins": 13.413922309875488,
113
+ "rewards/real": 3.0585832595825195,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
+ "train_loss": 0.11182688801519332,
121
+ "train_runtime": 795.5053,
122
+ "train_samples_per_second": 2.513,
123
+ "train_steps_per_second": 0.078
124
  }
125
  ],
126
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720e65b94fb282070a9aabfc803406247c348110037241f525a514b206d6c728
3
  size 6456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d139b04268630391e83d2f548af45201d344d1e3e0f51da37d4d9a7e0af539e
3
  size 6456