CocoRoF commited on
Commit
fdd6f65
·
verified ·
1 Parent(s): cbdd136

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6621097a97062a5102e67fc29c2bb01fb6549601c085f8f26cce5a1634634ee
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9f3b8a1efbafead71f0b4f3c0934de3258af1a9bfad9aecc866f7dc032377f
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e77fb5c2bafed3ee939a6057af4a25e874ed04b715f60fc720a59e7b1d77f2d
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:090bb424ceafc5d15809a08a16a11eb78b1ac54f4b1366d08eb4391ce4040896
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98b04d6c2e8863bbad65481224e2bdca0706f808989765d4a58e7054f3e5dac5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c383f9e8151a96a9b2b8c275978c19aa387d72a92b0fa7ffae9836fb29ad4e1
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ae3ef9777e30c36dff6498b006da1eb150bccee38de6cf7669f386fc977289b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68bb140caa20e97fbacbd7b5bfac9f50a34da20ffb8898607809de5338939b7
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:680cdbb58729160d28a1f3cc615b2c063f7c72522976ed4abf05aaf19f07acb8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b2b03148e735c06e96e9718897d377bb259c4fc8d0d7eac4359e0df9fd59c3
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1c050129d4c78b5963b9d24ede87255fc330819afb083e880a4ab6391077de6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb82703e57f841f914dfb29dc3442d88d2c174cf8ce56f91f0c9c5f2849c5754
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95927238adba95b6358a20dd1852c4905066c03f5b6f24857e2f6c82bb9f0977
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d69503f206ccaaa32432817fe07a0b2fe6f226f63d9d38c4bb47f2804049c1
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7431d23b9b61660f73b469a3e73ae62f971d8bd2ced76f239fd78258fe40a803
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0fdfa3710c3ff050391030ae78220221b31a31e2ceea64687f7a428110d141
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bdcc55ac2d89ab7fac22a6eb989c2be897f201356182513ed90bc09a5326786
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a7b71b6ee08059a6838f5a634279837dec0a6f331500089354f5a30e88cd0b0
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aae3da8149789ffc686c284e85fc275d996d793ce0edd8fa2949c1a21a4de8c9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b12210119d32a8e51c4b367e898fd3bee7de6dc9d70f1258ce5df806569ea8a4
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:708acca42b057f68ded76410daaf0ceaf94be65729403bb2d72b15a907559585
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc7c9d1278bb28457a3d5ee38d783399f75651e4b34a536f0367d5f3082f3b4
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.434864104967197,
5
  "eval_steps": 250,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6883,6 +6883,770 @@
6883
  "eval_spearman_manhattan": 0.7319318616566108,
6884
  "eval_steps_per_second": 7.274,
6885
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6886
  }
6887
  ],
6888
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.372071227741332,
5
  "eval_steps": 250,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6883
  "eval_spearman_manhattan": 0.7319318616566108,
6884
  "eval_steps_per_second": 7.274,
6885
  "step": 9000
6886
+ },
6887
+ {
6888
+ "epoch": 8.44423617619494,
6889
+ "grad_norm": 0.6332679986953735,
6890
+ "learning_rate": 9.934030370923575e-06,
6891
+ "loss": 0.027,
6892
+ "step": 9010
6893
+ },
6894
+ {
6895
+ "epoch": 8.45360824742268,
6896
+ "grad_norm": 0.6109747886657715,
6897
+ "learning_rate": 9.933957152689306e-06,
6898
+ "loss": 0.0242,
6899
+ "step": 9020
6900
+ },
6901
+ {
6902
+ "epoch": 8.462980318650422,
6903
+ "grad_norm": 1.127426266670227,
6904
+ "learning_rate": 9.933883934455038e-06,
6905
+ "loss": 0.035,
6906
+ "step": 9030
6907
+ },
6908
+ {
6909
+ "epoch": 8.472352389878163,
6910
+ "grad_norm": 0.7529722452163696,
6911
+ "learning_rate": 9.933810716220769e-06,
6912
+ "loss": 0.0303,
6913
+ "step": 9040
6914
+ },
6915
+ {
6916
+ "epoch": 8.481724461105905,
6917
+ "grad_norm": 0.6331318020820618,
6918
+ "learning_rate": 9.9337374979865e-06,
6919
+ "loss": 0.0301,
6920
+ "step": 9050
6921
+ },
6922
+ {
6923
+ "epoch": 8.491096532333646,
6924
+ "grad_norm": 0.9451204538345337,
6925
+ "learning_rate": 9.93366427975223e-06,
6926
+ "loss": 0.0301,
6927
+ "step": 9060
6928
+ },
6929
+ {
6930
+ "epoch": 8.500468603561387,
6931
+ "grad_norm": 1.0673385858535767,
6932
+ "learning_rate": 9.933591061517961e-06,
6933
+ "loss": 0.0311,
6934
+ "step": 9070
6935
+ },
6936
+ {
6937
+ "epoch": 8.509840674789128,
6938
+ "grad_norm": 0.5267199873924255,
6939
+ "learning_rate": 9.933517843283692e-06,
6940
+ "loss": 0.0258,
6941
+ "step": 9080
6942
+ },
6943
+ {
6944
+ "epoch": 8.51921274601687,
6945
+ "grad_norm": 1.0747129917144775,
6946
+ "learning_rate": 9.933444625049423e-06,
6947
+ "loss": 0.0308,
6948
+ "step": 9090
6949
+ },
6950
+ {
6951
+ "epoch": 8.52858481724461,
6952
+ "grad_norm": 0.5183865427970886,
6953
+ "learning_rate": 9.933371406815155e-06,
6954
+ "loss": 0.0272,
6955
+ "step": 9100
6956
+ },
6957
+ {
6958
+ "epoch": 8.537956888472353,
6959
+ "grad_norm": 0.8063677549362183,
6960
+ "learning_rate": 9.933298188580886e-06,
6961
+ "loss": 0.0256,
6962
+ "step": 9110
6963
+ },
6964
+ {
6965
+ "epoch": 8.547328959700094,
6966
+ "grad_norm": 0.7497850656509399,
6967
+ "learning_rate": 9.933224970346615e-06,
6968
+ "loss": 0.0272,
6969
+ "step": 9120
6970
+ },
6971
+ {
6972
+ "epoch": 8.556701030927835,
6973
+ "grad_norm": 1.4813112020492554,
6974
+ "learning_rate": 9.933151752112347e-06,
6975
+ "loss": 0.0271,
6976
+ "step": 9130
6977
+ },
6978
+ {
6979
+ "epoch": 8.566073102155576,
6980
+ "grad_norm": 0.9482595920562744,
6981
+ "learning_rate": 9.933078533878078e-06,
6982
+ "loss": 0.0256,
6983
+ "step": 9140
6984
+ },
6985
+ {
6986
+ "epoch": 8.575445173383319,
6987
+ "grad_norm": 0.5539655089378357,
6988
+ "learning_rate": 9.933005315643809e-06,
6989
+ "loss": 0.0274,
6990
+ "step": 9150
6991
+ },
6992
+ {
6993
+ "epoch": 8.58481724461106,
6994
+ "grad_norm": 0.7821139097213745,
6995
+ "learning_rate": 9.93293209740954e-06,
6996
+ "loss": 0.0284,
6997
+ "step": 9160
6998
+ },
6999
+ {
7000
+ "epoch": 8.5941893158388,
7001
+ "grad_norm": 0.9729026556015015,
7002
+ "learning_rate": 9.93285887917527e-06,
7003
+ "loss": 0.0256,
7004
+ "step": 9170
7005
+ },
7006
+ {
7007
+ "epoch": 8.603561387066541,
7008
+ "grad_norm": 1.1433371305465698,
7009
+ "learning_rate": 9.932785660941001e-06,
7010
+ "loss": 0.0274,
7011
+ "step": 9180
7012
+ },
7013
+ {
7014
+ "epoch": 8.612933458294282,
7015
+ "grad_norm": 1.211930751800537,
7016
+ "learning_rate": 9.932712442706732e-06,
7017
+ "loss": 0.0325,
7018
+ "step": 9190
7019
+ },
7020
+ {
7021
+ "epoch": 8.622305529522023,
7022
+ "grad_norm": 1.3734978437423706,
7023
+ "learning_rate": 9.932639224472463e-06,
7024
+ "loss": 0.0311,
7025
+ "step": 9200
7026
+ },
7027
+ {
7028
+ "epoch": 8.631677600749766,
7029
+ "grad_norm": 1.3476920127868652,
7030
+ "learning_rate": 9.932566006238195e-06,
7031
+ "loss": 0.0281,
7032
+ "step": 9210
7033
+ },
7034
+ {
7035
+ "epoch": 8.641049671977507,
7036
+ "grad_norm": 0.720197856426239,
7037
+ "learning_rate": 9.932492788003926e-06,
7038
+ "loss": 0.0233,
7039
+ "step": 9220
7040
+ },
7041
+ {
7042
+ "epoch": 8.650421743205248,
7043
+ "grad_norm": 1.2147605419158936,
7044
+ "learning_rate": 9.932419569769655e-06,
7045
+ "loss": 0.0308,
7046
+ "step": 9230
7047
+ },
7048
+ {
7049
+ "epoch": 8.65979381443299,
7050
+ "grad_norm": 0.5273356437683105,
7051
+ "learning_rate": 9.932346351535387e-06,
7052
+ "loss": 0.0278,
7053
+ "step": 9240
7054
+ },
7055
+ {
7056
+ "epoch": 8.669165885660732,
7057
+ "grad_norm": 1.316347360610962,
7058
+ "learning_rate": 9.932273133301118e-06,
7059
+ "loss": 0.0283,
7060
+ "step": 9250
7061
+ },
7062
+ {
7063
+ "epoch": 8.669165885660732,
7064
+ "eval_loss": 0.037036340683698654,
7065
+ "eval_pearson_cosine": 0.773975670337677,
7066
+ "eval_pearson_dot": 0.7285434007644653,
7067
+ "eval_pearson_euclidean": 0.7271639108657837,
7068
+ "eval_pearson_manhattan": 0.7293847799301147,
7069
+ "eval_runtime": 21.5505,
7070
+ "eval_samples_per_second": 69.604,
7071
+ "eval_spearman_cosine": 0.773229338598899,
7072
+ "eval_spearman_dot": 0.7297658810725091,
7073
+ "eval_spearman_euclidean": 0.7311555468063519,
7074
+ "eval_spearman_manhattan": 0.7331183382723726,
7075
+ "eval_steps_per_second": 8.724,
7076
+ "step": 9250
7077
+ },
7078
+ {
7079
+ "epoch": 8.678537956888473,
7080
+ "grad_norm": 0.6502562165260315,
7081
+ "learning_rate": 9.932199915066849e-06,
7082
+ "loss": 0.0255,
7083
+ "step": 9260
7084
+ },
7085
+ {
7086
+ "epoch": 8.687910028116214,
7087
+ "grad_norm": 1.172356128692627,
7088
+ "learning_rate": 9.93212669683258e-06,
7089
+ "loss": 0.0293,
7090
+ "step": 9270
7091
+ },
7092
+ {
7093
+ "epoch": 8.697282099343955,
7094
+ "grad_norm": 0.6329541206359863,
7095
+ "learning_rate": 9.932053478598312e-06,
7096
+ "loss": 0.0299,
7097
+ "step": 9280
7098
+ },
7099
+ {
7100
+ "epoch": 8.706654170571696,
7101
+ "grad_norm": 1.1246780157089233,
7102
+ "learning_rate": 9.931980260364041e-06,
7103
+ "loss": 0.0322,
7104
+ "step": 9290
7105
+ },
7106
+ {
7107
+ "epoch": 8.716026241799437,
7108
+ "grad_norm": 0.7996613383293152,
7109
+ "learning_rate": 9.931907042129772e-06,
7110
+ "loss": 0.0279,
7111
+ "step": 9300
7112
+ },
7113
+ {
7114
+ "epoch": 8.72539831302718,
7115
+ "grad_norm": 1.0772420167922974,
7116
+ "learning_rate": 9.931833823895504e-06,
7117
+ "loss": 0.0268,
7118
+ "step": 9310
7119
+ },
7120
+ {
7121
+ "epoch": 8.73477038425492,
7122
+ "grad_norm": 1.3459417819976807,
7123
+ "learning_rate": 9.931760605661235e-06,
7124
+ "loss": 0.0361,
7125
+ "step": 9320
7126
+ },
7127
+ {
7128
+ "epoch": 8.744142455482661,
7129
+ "grad_norm": 0.901692271232605,
7130
+ "learning_rate": 9.931687387426966e-06,
7131
+ "loss": 0.0317,
7132
+ "step": 9330
7133
+ },
7134
+ {
7135
+ "epoch": 8.753514526710402,
7136
+ "grad_norm": 1.1700392961502075,
7137
+ "learning_rate": 9.931614169192697e-06,
7138
+ "loss": 0.0257,
7139
+ "step": 9340
7140
+ },
7141
+ {
7142
+ "epoch": 8.762886597938145,
7143
+ "grad_norm": 1.1746001243591309,
7144
+ "learning_rate": 9.931540950958427e-06,
7145
+ "loss": 0.0261,
7146
+ "step": 9350
7147
+ },
7148
+ {
7149
+ "epoch": 8.772258669165886,
7150
+ "grad_norm": 1.250924825668335,
7151
+ "learning_rate": 9.931467732724158e-06,
7152
+ "loss": 0.0272,
7153
+ "step": 9360
7154
+ },
7155
+ {
7156
+ "epoch": 8.781630740393627,
7157
+ "grad_norm": 0.922290027141571,
7158
+ "learning_rate": 9.931394514489889e-06,
7159
+ "loss": 0.0295,
7160
+ "step": 9370
7161
+ },
7162
+ {
7163
+ "epoch": 8.791002811621368,
7164
+ "grad_norm": 0.6809844970703125,
7165
+ "learning_rate": 9.931321296255621e-06,
7166
+ "loss": 0.0269,
7167
+ "step": 9380
7168
+ },
7169
+ {
7170
+ "epoch": 8.800374882849109,
7171
+ "grad_norm": 0.8787119388580322,
7172
+ "learning_rate": 9.931248078021352e-06,
7173
+ "loss": 0.031,
7174
+ "step": 9390
7175
+ },
7176
+ {
7177
+ "epoch": 8.80974695407685,
7178
+ "grad_norm": 0.5186774134635925,
7179
+ "learning_rate": 9.931174859787083e-06,
7180
+ "loss": 0.0322,
7181
+ "step": 9400
7182
+ },
7183
+ {
7184
+ "epoch": 8.819119025304593,
7185
+ "grad_norm": 0.8100725412368774,
7186
+ "learning_rate": 9.931101641552814e-06,
7187
+ "loss": 0.026,
7188
+ "step": 9410
7189
+ },
7190
+ {
7191
+ "epoch": 8.828491096532334,
7192
+ "grad_norm": 0.7274125218391418,
7193
+ "learning_rate": 9.931028423318544e-06,
7194
+ "loss": 0.0291,
7195
+ "step": 9420
7196
+ },
7197
+ {
7198
+ "epoch": 8.837863167760075,
7199
+ "grad_norm": 1.1390098333358765,
7200
+ "learning_rate": 9.930955205084275e-06,
7201
+ "loss": 0.031,
7202
+ "step": 9430
7203
+ },
7204
+ {
7205
+ "epoch": 8.847235238987816,
7206
+ "grad_norm": 0.8184690475463867,
7207
+ "learning_rate": 9.930881986850006e-06,
7208
+ "loss": 0.0312,
7209
+ "step": 9440
7210
+ },
7211
+ {
7212
+ "epoch": 8.856607310215558,
7213
+ "grad_norm": 0.4963175356388092,
7214
+ "learning_rate": 9.930808768615737e-06,
7215
+ "loss": 0.0212,
7216
+ "step": 9450
7217
+ },
7218
+ {
7219
+ "epoch": 8.8659793814433,
7220
+ "grad_norm": 1.4110792875289917,
7221
+ "learning_rate": 9.930735550381467e-06,
7222
+ "loss": 0.0284,
7223
+ "step": 9460
7224
+ },
7225
+ {
7226
+ "epoch": 8.87535145267104,
7227
+ "grad_norm": 0.9356960654258728,
7228
+ "learning_rate": 9.930662332147198e-06,
7229
+ "loss": 0.0273,
7230
+ "step": 9470
7231
+ },
7232
+ {
7233
+ "epoch": 8.884723523898781,
7234
+ "grad_norm": 1.2740856409072876,
7235
+ "learning_rate": 9.930589113912929e-06,
7236
+ "loss": 0.0293,
7237
+ "step": 9480
7238
+ },
7239
+ {
7240
+ "epoch": 8.894095595126522,
7241
+ "grad_norm": 1.2273004055023193,
7242
+ "learning_rate": 9.930515895678661e-06,
7243
+ "loss": 0.0322,
7244
+ "step": 9490
7245
+ },
7246
+ {
7247
+ "epoch": 8.903467666354265,
7248
+ "grad_norm": 0.8036444187164307,
7249
+ "learning_rate": 9.930442677444392e-06,
7250
+ "loss": 0.0274,
7251
+ "step": 9500
7252
+ },
7253
+ {
7254
+ "epoch": 8.903467666354265,
7255
+ "eval_loss": 0.037216756492853165,
7256
+ "eval_pearson_cosine": 0.7742361426353455,
7257
+ "eval_pearson_dot": 0.7297594547271729,
7258
+ "eval_pearson_euclidean": 0.7265840172767639,
7259
+ "eval_pearson_manhattan": 0.7287671566009521,
7260
+ "eval_runtime": 27.1822,
7261
+ "eval_samples_per_second": 55.183,
7262
+ "eval_spearman_cosine": 0.773949198027488,
7263
+ "eval_spearman_dot": 0.7317025356234911,
7264
+ "eval_spearman_euclidean": 0.7328250947435205,
7265
+ "eval_spearman_manhattan": 0.7345883817446427,
7266
+ "eval_steps_per_second": 6.916,
7267
+ "step": 9500
7268
+ },
7269
+ {
7270
+ "epoch": 8.912839737582006,
7271
+ "grad_norm": 1.1801636219024658,
7272
+ "learning_rate": 9.930369459210123e-06,
7273
+ "loss": 0.0298,
7274
+ "step": 9510
7275
+ },
7276
+ {
7277
+ "epoch": 8.922211808809747,
7278
+ "grad_norm": 0.6167355179786682,
7279
+ "learning_rate": 9.930296240975853e-06,
7280
+ "loss": 0.0321,
7281
+ "step": 9520
7282
+ },
7283
+ {
7284
+ "epoch": 8.931583880037488,
7285
+ "grad_norm": 0.9813573956489563,
7286
+ "learning_rate": 9.930223022741584e-06,
7287
+ "loss": 0.0315,
7288
+ "step": 9530
7289
+ },
7290
+ {
7291
+ "epoch": 8.940955951265229,
7292
+ "grad_norm": 1.0033338069915771,
7293
+ "learning_rate": 9.930149804507315e-06,
7294
+ "loss": 0.0288,
7295
+ "step": 9540
7296
+ },
7297
+ {
7298
+ "epoch": 8.950328022492972,
7299
+ "grad_norm": 1.8989328145980835,
7300
+ "learning_rate": 9.930076586273046e-06,
7301
+ "loss": 0.0301,
7302
+ "step": 9550
7303
+ },
7304
+ {
7305
+ "epoch": 8.959700093720713,
7306
+ "grad_norm": 1.1895250082015991,
7307
+ "learning_rate": 9.930003368038778e-06,
7308
+ "loss": 0.0245,
7309
+ "step": 9560
7310
+ },
7311
+ {
7312
+ "epoch": 8.969072164948454,
7313
+ "grad_norm": 0.5209571719169617,
7314
+ "learning_rate": 9.929930149804509e-06,
7315
+ "loss": 0.0292,
7316
+ "step": 9570
7317
+ },
7318
+ {
7319
+ "epoch": 8.978444236176195,
7320
+ "grad_norm": 0.6561270952224731,
7321
+ "learning_rate": 9.929856931570238e-06,
7322
+ "loss": 0.0321,
7323
+ "step": 9580
7324
+ },
7325
+ {
7326
+ "epoch": 8.987816307403936,
7327
+ "grad_norm": 0.8421456217765808,
7328
+ "learning_rate": 9.92978371333597e-06,
7329
+ "loss": 0.0298,
7330
+ "step": 9590
7331
+ },
7332
+ {
7333
+ "epoch": 8.997188378631678,
7334
+ "grad_norm": 2.0356316566467285,
7335
+ "learning_rate": 9.929710495101701e-06,
7336
+ "loss": 0.0285,
7337
+ "step": 9600
7338
+ },
7339
+ {
7340
+ "epoch": 9.00656044985942,
7341
+ "grad_norm": 0.9041091799736023,
7342
+ "learning_rate": 9.929637276867432e-06,
7343
+ "loss": 0.0266,
7344
+ "step": 9610
7345
+ },
7346
+ {
7347
+ "epoch": 9.01593252108716,
7348
+ "grad_norm": 1.0879167318344116,
7349
+ "learning_rate": 9.929564058633163e-06,
7350
+ "loss": 0.0276,
7351
+ "step": 9620
7352
+ },
7353
+ {
7354
+ "epoch": 9.025304592314901,
7355
+ "grad_norm": 0.48896804451942444,
7356
+ "learning_rate": 9.929490840398893e-06,
7357
+ "loss": 0.0209,
7358
+ "step": 9630
7359
+ },
7360
+ {
7361
+ "epoch": 9.034676663542642,
7362
+ "grad_norm": 0.3795441687107086,
7363
+ "learning_rate": 9.929417622164624e-06,
7364
+ "loss": 0.0202,
7365
+ "step": 9640
7366
+ },
7367
+ {
7368
+ "epoch": 9.044048734770385,
7369
+ "grad_norm": 0.6517238020896912,
7370
+ "learning_rate": 9.929344403930355e-06,
7371
+ "loss": 0.0258,
7372
+ "step": 9650
7373
+ },
7374
+ {
7375
+ "epoch": 9.053420805998126,
7376
+ "grad_norm": 0.7814950942993164,
7377
+ "learning_rate": 9.929271185696087e-06,
7378
+ "loss": 0.0217,
7379
+ "step": 9660
7380
+ },
7381
+ {
7382
+ "epoch": 9.062792877225867,
7383
+ "grad_norm": 0.8012738823890686,
7384
+ "learning_rate": 9.929197967461818e-06,
7385
+ "loss": 0.0187,
7386
+ "step": 9670
7387
+ },
7388
+ {
7389
+ "epoch": 9.072164948453608,
7390
+ "grad_norm": 0.9685556292533875,
7391
+ "learning_rate": 9.929124749227549e-06,
7392
+ "loss": 0.0223,
7393
+ "step": 9680
7394
+ },
7395
+ {
7396
+ "epoch": 9.081537019681349,
7397
+ "grad_norm": 0.8415644764900208,
7398
+ "learning_rate": 9.92905153099328e-06,
7399
+ "loss": 0.0174,
7400
+ "step": 9690
7401
+ },
7402
+ {
7403
+ "epoch": 9.090909090909092,
7404
+ "grad_norm": 0.5449099540710449,
7405
+ "learning_rate": 9.92897831275901e-06,
7406
+ "loss": 0.025,
7407
+ "step": 9700
7408
+ },
7409
+ {
7410
+ "epoch": 9.100281162136833,
7411
+ "grad_norm": 0.7209439873695374,
7412
+ "learning_rate": 9.928905094524741e-06,
7413
+ "loss": 0.0221,
7414
+ "step": 9710
7415
+ },
7416
+ {
7417
+ "epoch": 9.109653233364574,
7418
+ "grad_norm": 0.5441991090774536,
7419
+ "learning_rate": 9.928831876290472e-06,
7420
+ "loss": 0.0217,
7421
+ "step": 9720
7422
+ },
7423
+ {
7424
+ "epoch": 9.119025304592315,
7425
+ "grad_norm": 0.7726917862892151,
7426
+ "learning_rate": 9.928758658056203e-06,
7427
+ "loss": 0.0264,
7428
+ "step": 9730
7429
+ },
7430
+ {
7431
+ "epoch": 9.128397375820056,
7432
+ "grad_norm": 1.4641560316085815,
7433
+ "learning_rate": 9.928685439821935e-06,
7434
+ "loss": 0.0215,
7435
+ "step": 9740
7436
+ },
7437
+ {
7438
+ "epoch": 9.137769447047798,
7439
+ "grad_norm": 0.7165714502334595,
7440
+ "learning_rate": 9.928612221587664e-06,
7441
+ "loss": 0.025,
7442
+ "step": 9750
7443
+ },
7444
+ {
7445
+ "epoch": 9.137769447047798,
7446
+ "eval_loss": 0.03766760975122452,
7447
+ "eval_pearson_cosine": 0.7719284296035767,
7448
+ "eval_pearson_dot": 0.7294802665710449,
7449
+ "eval_pearson_euclidean": 0.7313249111175537,
7450
+ "eval_pearson_manhattan": 0.7333976626396179,
7451
+ "eval_runtime": 27.8656,
7452
+ "eval_samples_per_second": 53.83,
7453
+ "eval_spearman_cosine": 0.7718354415047185,
7454
+ "eval_spearman_dot": 0.730941479257979,
7455
+ "eval_spearman_euclidean": 0.7371740495785648,
7456
+ "eval_spearman_manhattan": 0.7388595895844299,
7457
+ "eval_steps_per_second": 6.747,
7458
+ "step": 9750
7459
+ },
7460
+ {
7461
+ "epoch": 9.14714151827554,
7462
+ "grad_norm": 1.162800908088684,
7463
+ "learning_rate": 9.928539003353395e-06,
7464
+ "loss": 0.0299,
7465
+ "step": 9760
7466
+ },
7467
+ {
7468
+ "epoch": 9.15651358950328,
7469
+ "grad_norm": 0.826000452041626,
7470
+ "learning_rate": 9.928465785119127e-06,
7471
+ "loss": 0.0204,
7472
+ "step": 9770
7473
+ },
7474
+ {
7475
+ "epoch": 9.165885660731021,
7476
+ "grad_norm": 0.4205090403556824,
7477
+ "learning_rate": 9.928392566884858e-06,
7478
+ "loss": 0.0213,
7479
+ "step": 9780
7480
+ },
7481
+ {
7482
+ "epoch": 9.175257731958762,
7483
+ "grad_norm": 1.4229509830474854,
7484
+ "learning_rate": 9.928319348650589e-06,
7485
+ "loss": 0.0232,
7486
+ "step": 9790
7487
+ },
7488
+ {
7489
+ "epoch": 9.184629803186505,
7490
+ "grad_norm": 0.550862729549408,
7491
+ "learning_rate": 9.92824613041632e-06,
7492
+ "loss": 0.0247,
7493
+ "step": 9800
7494
+ },
7495
+ {
7496
+ "epoch": 9.194001874414246,
7497
+ "grad_norm": 0.6965065598487854,
7498
+ "learning_rate": 9.92817291218205e-06,
7499
+ "loss": 0.025,
7500
+ "step": 9810
7501
+ },
7502
+ {
7503
+ "epoch": 9.203373945641987,
7504
+ "grad_norm": 0.43077608942985535,
7505
+ "learning_rate": 9.928099693947781e-06,
7506
+ "loss": 0.0251,
7507
+ "step": 9820
7508
+ },
7509
+ {
7510
+ "epoch": 9.212746016869728,
7511
+ "grad_norm": 0.450005441904068,
7512
+ "learning_rate": 9.928026475713512e-06,
7513
+ "loss": 0.0212,
7514
+ "step": 9830
7515
+ },
7516
+ {
7517
+ "epoch": 9.222118088097469,
7518
+ "grad_norm": 1.184260368347168,
7519
+ "learning_rate": 9.927953257479244e-06,
7520
+ "loss": 0.0222,
7521
+ "step": 9840
7522
+ },
7523
+ {
7524
+ "epoch": 9.231490159325212,
7525
+ "grad_norm": 0.5146024823188782,
7526
+ "learning_rate": 9.927880039244975e-06,
7527
+ "loss": 0.0237,
7528
+ "step": 9850
7529
+ },
7530
+ {
7531
+ "epoch": 9.240862230552953,
7532
+ "grad_norm": 0.638936460018158,
7533
+ "learning_rate": 9.927806821010704e-06,
7534
+ "loss": 0.0313,
7535
+ "step": 9860
7536
+ },
7537
+ {
7538
+ "epoch": 9.250234301780694,
7539
+ "grad_norm": 0.5175133943557739,
7540
+ "learning_rate": 9.927733602776437e-06,
7541
+ "loss": 0.0267,
7542
+ "step": 9870
7543
+ },
7544
+ {
7545
+ "epoch": 9.259606373008435,
7546
+ "grad_norm": 0.46744242310523987,
7547
+ "learning_rate": 9.927660384542167e-06,
7548
+ "loss": 0.0221,
7549
+ "step": 9880
7550
+ },
7551
+ {
7552
+ "epoch": 9.268978444236176,
7553
+ "grad_norm": 1.0883630514144897,
7554
+ "learning_rate": 9.927587166307898e-06,
7555
+ "loss": 0.0209,
7556
+ "step": 9890
7557
+ },
7558
+ {
7559
+ "epoch": 9.278350515463918,
7560
+ "grad_norm": 0.8785117864608765,
7561
+ "learning_rate": 9.927513948073629e-06,
7562
+ "loss": 0.0243,
7563
+ "step": 9900
7564
+ },
7565
+ {
7566
+ "epoch": 9.28772258669166,
7567
+ "grad_norm": 1.33463716506958,
7568
+ "learning_rate": 9.927440729839361e-06,
7569
+ "loss": 0.0231,
7570
+ "step": 9910
7571
+ },
7572
+ {
7573
+ "epoch": 9.2970946579194,
7574
+ "grad_norm": 0.6693497896194458,
7575
+ "learning_rate": 9.92736751160509e-06,
7576
+ "loss": 0.0205,
7577
+ "step": 9920
7578
+ },
7579
+ {
7580
+ "epoch": 9.306466729147141,
7581
+ "grad_norm": 0.44432297348976135,
7582
+ "learning_rate": 9.927294293370821e-06,
7583
+ "loss": 0.0255,
7584
+ "step": 9930
7585
+ },
7586
+ {
7587
+ "epoch": 9.315838800374882,
7588
+ "grad_norm": 0.9900962710380554,
7589
+ "learning_rate": 9.927221075136553e-06,
7590
+ "loss": 0.0262,
7591
+ "step": 9940
7592
+ },
7593
+ {
7594
+ "epoch": 9.325210871602625,
7595
+ "grad_norm": 0.8196175694465637,
7596
+ "learning_rate": 9.927147856902284e-06,
7597
+ "loss": 0.0262,
7598
+ "step": 9950
7599
+ },
7600
+ {
7601
+ "epoch": 9.334582942830366,
7602
+ "grad_norm": 1.0177077054977417,
7603
+ "learning_rate": 9.927074638668015e-06,
7604
+ "loss": 0.0267,
7605
+ "step": 9960
7606
+ },
7607
+ {
7608
+ "epoch": 9.343955014058107,
7609
+ "grad_norm": 1.218307375907898,
7610
+ "learning_rate": 9.927001420433746e-06,
7611
+ "loss": 0.0248,
7612
+ "step": 9970
7613
+ },
7614
+ {
7615
+ "epoch": 9.353327085285848,
7616
+ "grad_norm": 0.9856002926826477,
7617
+ "learning_rate": 9.926928202199476e-06,
7618
+ "loss": 0.0233,
7619
+ "step": 9980
7620
+ },
7621
+ {
7622
+ "epoch": 9.362699156513589,
7623
+ "grad_norm": 0.6501719355583191,
7624
+ "learning_rate": 9.926854983965207e-06,
7625
+ "loss": 0.0271,
7626
+ "step": 9990
7627
+ },
7628
+ {
7629
+ "epoch": 9.372071227741332,
7630
+ "grad_norm": 0.5562245845794678,
7631
+ "learning_rate": 9.926781765730938e-06,
7632
+ "loss": 0.031,
7633
+ "step": 10000
7634
+ },
7635
+ {
7636
+ "epoch": 9.372071227741332,
7637
+ "eval_loss": 0.03722027316689491,
7638
+ "eval_pearson_cosine": 0.7733820676803589,
7639
+ "eval_pearson_dot": 0.725334644317627,
7640
+ "eval_pearson_euclidean": 0.7356694936752319,
7641
+ "eval_pearson_manhattan": 0.7372510433197021,
7642
+ "eval_runtime": 25.8635,
7643
+ "eval_samples_per_second": 57.997,
7644
+ "eval_spearman_cosine": 0.7735257400299028,
7645
+ "eval_spearman_dot": 0.726586040502744,
7646
+ "eval_spearman_euclidean": 0.7407176416099474,
7647
+ "eval_spearman_manhattan": 0.7421316928799319,
7648
+ "eval_steps_per_second": 7.269,
7649
+ "step": 10000
7650
  }
7651
  ],
7652
  "logging_steps": 10,