Upload folder using huggingface_hub
Browse files- attention_kindselective_n_heads2_seed1340/log2.txt +92 -0
- attention_kindselective_n_heads2_seed1340/model_07500.pt +1 -1
- attention_kindselective_n_heads2_seed1340/model_09999.pt +1 -1
- attention_kindselective_n_heads2_seed1340/optimizer_07500.pt +1 -1
- attention_kindselective_n_heads2_seed1340/optimizer_09999.pt +1 -1
attention_kindselective_n_heads2_seed1340/log2.txt
CHANGED
|
@@ -421,3 +421,95 @@ max_steps: 10000
|
|
| 421 |
6900 val loss 6.4899
|
| 422 |
6900 val perplexity 658.4554
|
| 423 |
6900 train 6.663884 (lr=4.5676e-05) (hash(x)=46534986)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
6900 val loss 6.4899
|
| 422 |
6900 val perplexity 658.4554
|
| 423 |
6900 train 6.663884 (lr=4.5676e-05) (hash(x)=46534986)
|
| 424 |
+
7000 val loss 6.4768
|
| 425 |
+
7000 val perplexity 649.8740
|
| 426 |
+
7000 train 6.840258 (lr=4.3882e-05) (hash(x)=49317888)
|
| 427 |
+
7100 val loss 6.4738
|
| 428 |
+
7100 val perplexity 647.9160
|
| 429 |
+
7100 train 6.493526 (lr=4.2128e-05) (hash(x)=50360484)
|
| 430 |
+
7200 val loss 6.4698
|
| 431 |
+
7200 val perplexity 645.3282
|
| 432 |
+
7200 train 6.339218 (lr=4.0414e-05) (hash(x)=49515094)
|
| 433 |
+
7300 val loss 6.4730
|
| 434 |
+
7300 val perplexity 647.4099
|
| 435 |
+
7300 train 6.455276 (lr=3.8745e-05) (hash(x)=51546861)
|
| 436 |
+
7400 val loss 6.4622
|
| 437 |
+
7400 val perplexity 640.4369
|
| 438 |
+
7400 train 6.428791 (lr=3.7120e-05) (hash(x)=48320948)
|
| 439 |
+
7500 val loss 6.4678
|
| 440 |
+
7500 val perplexity 644.0776
|
| 441 |
+
7500 train 6.414234 (lr=3.5541e-05) (hash(x)=40167457)
|
| 442 |
+
7600 val loss 6.4528
|
| 443 |
+
7600 val perplexity 634.4956
|
| 444 |
+
7600 train 6.400621 (lr=3.4011e-05) (hash(x)=49942165)
|
| 445 |
+
7700 val loss 6.4515
|
| 446 |
+
7700 val perplexity 633.6224
|
| 447 |
+
7700 train 6.178792 (lr=3.2531e-05) (hash(x)=48853311)
|
| 448 |
+
7800 val loss 6.4469
|
| 449 |
+
7800 val perplexity 630.7529
|
| 450 |
+
7800 train 6.332662 (lr=3.1102e-05) (hash(x)=48510117)
|
| 451 |
+
7900 val loss 6.4452
|
| 452 |
+
7900 val perplexity 629.6969
|
| 453 |
+
7900 train 6.454893 (lr=2.9726e-05) (hash(x)=48339781)
|
| 454 |
+
8000 val loss 6.4485
|
| 455 |
+
8000 val perplexity 631.7830
|
| 456 |
+
8000 train 6.561584 (lr=2.8405e-05) (hash(x)=54927320)
|
| 457 |
+
8100 val loss 6.4404
|
| 458 |
+
8100 val perplexity 626.6378
|
| 459 |
+
8100 train 6.167403 (lr=2.7138e-05) (hash(x)=46461786)
|
| 460 |
+
8200 val loss 6.4388
|
| 461 |
+
8200 val perplexity 625.6752
|
| 462 |
+
8200 train 6.347755 (lr=2.5929e-05) (hash(x)=51536260)
|
| 463 |
+
8300 val loss 6.4365
|
| 464 |
+
8300 val perplexity 624.2048
|
| 465 |
+
8300 train 6.190253 (lr=2.4778e-05) (hash(x)=44770722)
|
| 466 |
+
8400 val loss 6.4290
|
| 467 |
+
8400 val perplexity 619.5564
|
| 468 |
+
8400 train 6.434593 (lr=2.3686e-05) (hash(x)=50104957)
|
| 469 |
+
8500 val loss 6.4245
|
| 470 |
+
8500 val perplexity 616.7615
|
| 471 |
+
8500 train 6.533162 (lr=2.2655e-05) (hash(x)=50132971)
|
| 472 |
+
8600 val loss 6.4212
|
| 473 |
+
8600 val perplexity 614.7573
|
| 474 |
+
8600 train 6.378406 (lr=2.1685e-05) (hash(x)=52193699)
|
| 475 |
+
8700 val loss 6.4183
|
| 476 |
+
8700 val perplexity 612.9755
|
| 477 |
+
8700 train 6.427931 (lr=2.0777e-05) (hash(x)=47902319)
|
| 478 |
+
8800 val loss 6.4139
|
| 479 |
+
8800 val perplexity 610.2626
|
| 480 |
+
8800 train 6.709031 (lr=1.9933e-05) (hash(x)=54904230)
|
| 481 |
+
8900 val loss 6.4051
|
| 482 |
+
8900 val perplexity 604.9390
|
| 483 |
+
8900 train 6.288378 (lr=1.9153e-05) (hash(x)=46311615)
|
| 484 |
+
9000 val loss 6.4113
|
| 485 |
+
9000 val perplexity 608.7029
|
| 486 |
+
9000 train 6.270270 (lr=1.8439e-05) (hash(x)=48535188)
|
| 487 |
+
9100 val loss 6.4026
|
| 488 |
+
9100 val perplexity 603.3902
|
| 489 |
+
9100 train 6.419470 (lr=1.7790e-05) (hash(x)=51757372)
|
| 490 |
+
9200 val loss 6.4024
|
| 491 |
+
9200 val perplexity 603.2843
|
| 492 |
+
9200 train 6.288178 (lr=1.7208e-05) (hash(x)=51131708)
|
| 493 |
+
9300 val loss 6.4021
|
| 494 |
+
9300 val perplexity 603.1321
|
| 495 |
+
9300 train 6.361725 (lr=1.6692e-05) (hash(x)=44784276)
|
| 496 |
+
9400 val loss 6.4006
|
| 497 |
+
9400 val perplexity 602.2206
|
| 498 |
+
9400 train 6.529187 (lr=1.6245e-05) (hash(x)=51981169)
|
| 499 |
+
9500 val loss 6.3956
|
| 500 |
+
9500 val perplexity 599.1955
|
| 501 |
+
9500 train 6.358074 (lr=1.5865e-05) (hash(x)=47232936)
|
| 502 |
+
9600 val loss 6.3954
|
| 503 |
+
9600 val perplexity 599.1069
|
| 504 |
+
9600 train 6.410747 (lr=1.5554e-05) (hash(x)=53800450)
|
| 505 |
+
9700 val loss 6.3937
|
| 506 |
+
9700 val perplexity 598.0549
|
| 507 |
+
9700 train 6.500488 (lr=1.5312e-05) (hash(x)=55768123)
|
| 508 |
+
9800 val loss 6.3977
|
| 509 |
+
9800 val perplexity 600.4852
|
| 510 |
+
9800 train 6.321241 (lr=1.5139e-05) (hash(x)=47745177)
|
| 511 |
+
9900 val loss 6.3977
|
| 512 |
+
9900 val perplexity 600.4746
|
| 513 |
+
9900 train 6.664391 (lr=1.5035e-05) (hash(x)=56592246)
|
| 514 |
+
9999 val loss 6.4009
|
| 515 |
+
9999 val perplexity 602.3967
|
attention_kindselective_n_heads2_seed1340/model_07500.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:933638ce43638ef9bd5900408f7bbcb30a32be65bd1c3e240216a1265da81d33
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1340/model_09999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c6f68fd809731ce63a07f277326cf637cffea9c9a2009b8829099fe17da7950
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1340/optimizer_07500.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c475776587ad7676fd824bc7c9c9570e94a3ffb32776966605d3ccf70784164
|
| 3 |
size 70895430
|
attention_kindselective_n_heads2_seed1340/optimizer_09999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb9517903ac3b15db5cf8d112e5905aa1e0fd70df70394bae89e0ac4a5c21274
|
| 3 |
size 70895430
|