cocolalala commited on
Commit
d5b9649
1 Parent(s): c1a75b8

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,14 @@
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
3
  "total_flos": 1.5751056572484157e+19,
4
- "train_loss": 0.09284130807192821,
5
- "train_runtime": 20560.1048,
6
  "train_samples": 1055292,
7
- "train_samples_per_second": 15.837,
8
- "train_steps_per_second": 0.247
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.7983009815216064,
4
+ "eval_runtime": 7985.9663,
5
+ "eval_samples": 263823,
6
+ "eval_samples_per_second": 10.19,
7
+ "eval_steps_per_second": 0.159,
8
  "total_flos": 1.5751056572484157e+19,
9
+ "train_loss": 0.013955340304839536,
10
+ "train_runtime": 11108.4839,
11
  "train_samples": 1055292,
12
+ "train_samples_per_second": 29.312,
13
+ "train_steps_per_second": 0.458
14
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.7983009815216064,
4
+ "eval_runtime": 7985.9663,
5
+ "eval_samples": 263823,
6
+ "eval_samples_per_second": 10.19,
7
+ "eval_steps_per_second": 0.159
8
+ }
runs/May25_13-55-16_br1t43-s3-25/events.out.tfevents.1716673878.br1t43-s3-25.187086.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aaa7f8fbb9f75e566fcde30f482fcb7a5578a9d37b3382a6d356ad6e0743613
3
+ size 359
runs/May26_01-54-52_br1t43-s3-25/events.out.tfevents.1716688506.br1t43-s3-25.190932.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:729d2f0c91d57354efb497f12dfb73a29e4347765a3bbc83beab8a36bdf1c070
3
+ size 9665
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 1.5751056572484157e+19,
4
- "train_loss": 0.09284130807192821,
5
- "train_runtime": 20560.1048,
6
  "train_samples": 1055292,
7
- "train_samples_per_second": 15.837,
8
- "train_steps_per_second": 0.247
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 1.5751056572484157e+19,
4
+ "train_loss": 0.013955340304839536,
5
+ "train_runtime": 11108.4839,
6
  "train_samples": 1055292,
7
+ "train_samples_per_second": 29.312,
8
+ "train_steps_per_second": 0.458
9
  }
trainer_state.json CHANGED
@@ -7031,9 +7031,9 @@
7031
  },
7032
  {
7033
  "epoch": 0.9856525157232704,
7034
- "grad_norm": 0.31640625,
7035
  "learning_rate": 1.2539591749821666e-07,
7036
- "loss": 0.7973,
7037
  "step": 5015
7038
  },
7039
  {
@@ -7047,14 +7047,14 @@
7047
  "epoch": 0.9876179245283019,
7048
  "grad_norm": 0.31640625,
7049
  "learning_rate": 9.339893961548551e-08,
7050
- "loss": 0.8152,
7051
  "step": 5025
7052
  },
7053
  {
7054
  "epoch": 0.9886006289308176,
7055
  "grad_norm": 0.310546875,
7056
  "learning_rate": 7.916389256541479e-08,
7057
- "loss": 0.8147,
7058
  "step": 5030
7059
  },
7060
  {
@@ -7082,21 +7082,21 @@
7082
  "epoch": 0.9925314465408805,
7083
  "grad_norm": 0.310546875,
7084
  "learning_rate": 3.3983730900377655e-08,
7085
- "loss": 0.8009,
7086
  "step": 5050
7087
  },
7088
  {
7089
  "epoch": 0.9935141509433962,
7090
- "grad_norm": 0.30078125,
7091
  "learning_rate": 2.5629361711809742e-08,
7092
- "loss": 0.8025,
7093
  "step": 5055
7094
  },
7095
  {
7096
  "epoch": 0.9944968553459119,
7097
  "grad_norm": 0.30078125,
7098
  "learning_rate": 1.8451478405223653e-08,
7099
- "loss": 0.7953,
7100
  "step": 5060
7101
  },
7102
  {
@@ -7108,48 +7108,48 @@
7108
  },
7109
  {
7110
  "epoch": 0.9964622641509434,
7111
- "grad_norm": 0.3046875,
7112
  "learning_rate": 7.62549346601249e-09,
7113
- "loss": 0.8113,
7114
  "step": 5070
7115
  },
7116
  {
7117
  "epoch": 0.9974449685534591,
7118
  "grad_norm": 0.302734375,
7119
  "learning_rate": 3.977519232223337e-09,
7120
- "loss": 0.8174,
7121
  "step": 5075
7122
  },
7123
  {
7124
  "epoch": 0.9984276729559748,
7125
  "grad_norm": 0.302734375,
7126
  "learning_rate": 1.5062856765779565e-09,
7127
- "loss": 0.8089,
7128
  "step": 5080
7129
  },
7130
  {
7131
  "epoch": 0.9994103773584906,
7132
- "grad_norm": 0.3125,
7133
  "learning_rate": 2.118218802582561e-10,
7134
  "loss": 0.8288,
7135
  "step": 5085
7136
  },
7137
  {
7138
  "epoch": 1.0,
7139
- "eval_loss": 0.7983009815216064,
7140
- "eval_runtime": 7962.7938,
7141
- "eval_samples_per_second": 10.22,
7142
- "eval_steps_per_second": 0.16,
7143
  "step": 5088
7144
  },
7145
  {
7146
  "epoch": 1.0,
7147
  "step": 5088,
7148
  "total_flos": 1.5751056572484157e+19,
7149
- "train_loss": 0.09284130807192821,
7150
- "train_runtime": 20560.1048,
7151
- "train_samples_per_second": 15.837,
7152
- "train_steps_per_second": 0.247
7153
  }
7154
  ],
7155
  "logging_steps": 5,
 
7031
  },
7032
  {
7033
  "epoch": 0.9856525157232704,
7034
+ "grad_norm": 0.318359375,
7035
  "learning_rate": 1.2539591749821666e-07,
7036
+ "loss": 0.7974,
7037
  "step": 5015
7038
  },
7039
  {
 
7047
  "epoch": 0.9876179245283019,
7048
  "grad_norm": 0.31640625,
7049
  "learning_rate": 9.339893961548551e-08,
7050
+ "loss": 0.8153,
7051
  "step": 5025
7052
  },
7053
  {
7054
  "epoch": 0.9886006289308176,
7055
  "grad_norm": 0.310546875,
7056
  "learning_rate": 7.916389256541479e-08,
7057
+ "loss": 0.8146,
7058
  "step": 5030
7059
  },
7060
  {
 
7082
  "epoch": 0.9925314465408805,
7083
  "grad_norm": 0.310546875,
7084
  "learning_rate": 3.3983730900377655e-08,
7085
+ "loss": 0.8008,
7086
  "step": 5050
7087
  },
7088
  {
7089
  "epoch": 0.9935141509433962,
7090
+ "grad_norm": 0.302734375,
7091
  "learning_rate": 2.5629361711809742e-08,
7092
+ "loss": 0.8024,
7093
  "step": 5055
7094
  },
7095
  {
7096
  "epoch": 0.9944968553459119,
7097
  "grad_norm": 0.30078125,
7098
  "learning_rate": 1.8451478405223653e-08,
7099
+ "loss": 0.7952,
7100
  "step": 5060
7101
  },
7102
  {
 
7108
  },
7109
  {
7110
  "epoch": 0.9964622641509434,
7111
+ "grad_norm": 0.306640625,
7112
  "learning_rate": 7.62549346601249e-09,
7113
+ "loss": 0.8112,
7114
  "step": 5070
7115
  },
7116
  {
7117
  "epoch": 0.9974449685534591,
7118
  "grad_norm": 0.302734375,
7119
  "learning_rate": 3.977519232223337e-09,
7120
+ "loss": 0.8175,
7121
  "step": 5075
7122
  },
7123
  {
7124
  "epoch": 0.9984276729559748,
7125
  "grad_norm": 0.302734375,
7126
  "learning_rate": 1.5062856765779565e-09,
7127
+ "loss": 0.8088,
7128
  "step": 5080
7129
  },
7130
  {
7131
  "epoch": 0.9994103773584906,
7132
+ "grad_norm": 0.310546875,
7133
  "learning_rate": 2.118218802582561e-10,
7134
  "loss": 0.8288,
7135
  "step": 5085
7136
  },
7137
  {
7138
  "epoch": 1.0,
7139
+ "eval_loss": 0.7983007431030273,
7140
+ "eval_runtime": 9224.1097,
7141
+ "eval_samples_per_second": 8.823,
7142
+ "eval_steps_per_second": 0.138,
7143
  "step": 5088
7144
  },
7145
  {
7146
  "epoch": 1.0,
7147
  "step": 5088,
7148
  "total_flos": 1.5751056572484157e+19,
7149
+ "train_loss": 0.013955340304839536,
7150
+ "train_runtime": 11108.4839,
7151
+ "train_samples_per_second": 29.312,
7152
+ "train_steps_per_second": 0.458
7153
  }
7154
  ],
7155
  "logging_steps": 5,