MohamedAhmedAE commited on
Commit
5fbfc2c
1 Parent(s): e444d8f

Training in progress, step 19000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a9c62f875c287b7c4d9167adebf029a6ce30f517d94dbbe27ecde3a226f0357
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30377e3553033d932e69daa7064c259e8a7335e5cc0b78f6a1f4f648bae4b97e
3
  size 167832240
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
 
24
  "o_proj",
25
  "k_proj",
26
- "gate_proj",
27
- "down_proj",
28
  "up_proj",
29
- "v_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "down_proj",
24
+ "gate_proj",
25
  "o_proj",
26
  "k_proj",
 
 
27
  "up_proj",
28
+ "v_proj",
29
+ "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a9c62f875c287b7c4d9167adebf029a6ce30f517d94dbbe27ecde3a226f0357
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b74082b50533e4333b88e29acd5ec8eecf39804ba0295840da3c866df5050cc
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06b6897debb3d46ba4387737336e6f97292f866a07ef61439e53f84fd4505d62
3
- size 85736914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f867fe76c879c3168a89d511c76e1fa1b26ec6a18b25ec83cf06f006a135e0
3
+ size 84581014
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9989851a017fcf0ea72ad3948880d1a8db6c3206bc2c0667a86129b6301b196
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68341ef163cc0109a8713173add8dbcfd98bf67468e1a64939a8b03523666bf9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:821633e9583ccb6e3bb6cf440d7524519270ce3ff11ad9b4c39204df191e39fc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6fa196d6a1215f8cb5411f481e0c89a67b0e9876e4efe3f1ada6c1ba44ba0a3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.027961731098873722,
5
  "eval_steps": 2000,
6
- "global_step": 18800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -413,266 +413,14 @@
413
  "learning_rate": 1.9999853411898932e-05,
414
  "loss": 1.5097,
415
  "step": 11600
416
- },
417
- {
418
- "epoch": 0.02,
419
- "grad_norm": 3.229198455810547,
420
- "learning_rate": 1.999939333873553e-05,
421
- "loss": 1.5721,
422
- "step": 11800
423
- },
424
- {
425
- "epoch": 0.02,
426
- "grad_norm": 1.2855397462844849,
427
- "learning_rate": 1.9999372576820398e-05,
428
- "loss": 1.5382,
429
- "step": 12000
430
- },
431
- {
432
- "epoch": 0.02,
433
- "grad_norm": 1.536872148513794,
434
- "learning_rate": 1.9999351465598642e-05,
435
- "loss": 1.5964,
436
- "step": 12200
437
- },
438
- {
439
- "epoch": 0.02,
440
- "grad_norm": 2.0981087684631348,
441
- "learning_rate": 1.9999330005070992e-05,
442
- "loss": 1.5269,
443
- "step": 12400
444
- },
445
- {
446
- "epoch": 0.02,
447
- "grad_norm": 2.213561773300171,
448
- "learning_rate": 1.99993081952382e-05,
449
- "loss": 1.488,
450
- "step": 12600
451
- },
452
- {
453
- "epoch": 0.02,
454
- "grad_norm": 2.3960020542144775,
455
- "learning_rate": 1.999928603610103e-05,
456
- "loss": 1.5714,
457
- "step": 12800
458
- },
459
- {
460
- "epoch": 0.02,
461
- "grad_norm": 2.198500394821167,
462
- "learning_rate": 1.9999263641071352e-05,
463
- "loss": 1.5587,
464
- "step": 13000
465
- },
466
- {
467
- "epoch": 0.02,
468
- "grad_norm": 2.4841859340667725,
469
- "learning_rate": 1.9999240785074275e-05,
470
- "loss": 1.5417,
471
- "step": 13200
472
- },
473
- {
474
- "epoch": 0.02,
475
- "grad_norm": 2.9682819843292236,
476
- "learning_rate": 1.999921757977517e-05,
477
- "loss": 1.578,
478
- "step": 13400
479
- },
480
- {
481
- "epoch": 0.02,
482
- "grad_norm": 2.8368330001831055,
483
- "learning_rate": 1.999919402517485e-05,
484
- "loss": 1.5703,
485
- "step": 13600
486
- },
487
- {
488
- "epoch": 0.02,
489
- "grad_norm": 3.0925166606903076,
490
- "learning_rate": 1.9999170121274143e-05,
491
- "loss": 1.5163,
492
- "step": 13800
493
- },
494
- {
495
- "epoch": 0.02,
496
- "grad_norm": 2.2362563610076904,
497
- "learning_rate": 1.999914586807388e-05,
498
- "loss": 1.6078,
499
- "step": 14000
500
- },
501
- {
502
- "epoch": 0.02,
503
- "grad_norm": 3.019454002380371,
504
- "learning_rate": 1.9999121265574902e-05,
505
- "loss": 1.5317,
506
- "step": 14200
507
- },
508
- {
509
- "epoch": 0.02,
510
- "grad_norm": 2.67069411277771,
511
- "learning_rate": 1.9999096313778082e-05,
512
- "loss": 1.529,
513
- "step": 14400
514
- },
515
- {
516
- "epoch": 0.02,
517
- "grad_norm": 2.8095571994781494,
518
- "learning_rate": 1.9999071012684285e-05,
519
- "loss": 1.557,
520
- "step": 14600
521
- },
522
- {
523
- "epoch": 0.02,
524
- "grad_norm": 2.3300442695617676,
525
- "learning_rate": 1.9999045362294388e-05,
526
- "loss": 1.5554,
527
- "step": 14800
528
- },
529
- {
530
- "epoch": 0.02,
531
- "grad_norm": 2.160933256149292,
532
- "learning_rate": 1.9999019362609297e-05,
533
- "loss": 1.528,
534
- "step": 15000
535
- },
536
- {
537
- "epoch": 0.02,
538
- "grad_norm": 1.6309542655944824,
539
- "learning_rate": 1.999899301362992e-05,
540
- "loss": 1.5344,
541
- "step": 15200
542
- },
543
- {
544
- "epoch": 0.02,
545
- "grad_norm": 3.1774258613586426,
546
- "learning_rate": 1.9998966315357173e-05,
547
- "loss": 1.5661,
548
- "step": 15400
549
- },
550
- {
551
- "epoch": 0.02,
552
- "grad_norm": 2.8362374305725098,
553
- "learning_rate": 1.9998939267791986e-05,
554
- "loss": 1.5404,
555
- "step": 15600
556
- },
557
- {
558
- "epoch": 0.02,
559
- "grad_norm": 1.6643764972686768,
560
- "learning_rate": 1.999891187093531e-05,
561
- "loss": 1.562,
562
- "step": 15800
563
- },
564
- {
565
- "epoch": 0.02,
566
- "grad_norm": 2.519455671310425,
567
- "learning_rate": 1.99988841247881e-05,
568
- "loss": 1.5468,
569
- "step": 16000
570
- },
571
- {
572
- "epoch": 0.02,
573
- "grad_norm": 1.8681560754776,
574
- "learning_rate": 1.9998856029351327e-05,
575
- "loss": 1.501,
576
- "step": 16200
577
- },
578
- {
579
- "epoch": 0.02,
580
- "grad_norm": 1.5082764625549316,
581
- "learning_rate": 1.999882758462597e-05,
582
- "loss": 1.5632,
583
- "step": 16400
584
- },
585
- {
586
- "epoch": 0.02,
587
- "grad_norm": 1.8632557392120361,
588
- "learning_rate": 1.9998798790613018e-05,
589
- "loss": 1.5509,
590
- "step": 16600
591
- },
592
- {
593
- "epoch": 0.02,
594
- "grad_norm": 3.0881147384643555,
595
- "learning_rate": 1.999876964731349e-05,
596
- "loss": 1.5277,
597
- "step": 16800
598
- },
599
- {
600
- "epoch": 0.03,
601
- "grad_norm": 1.9005630016326904,
602
- "learning_rate": 1.9998740303060157e-05,
603
- "loss": 1.5542,
604
- "step": 17000
605
- },
606
- {
607
- "epoch": 0.03,
608
- "grad_norm": 1.4960647821426392,
609
- "learning_rate": 1.9998710462936946e-05,
610
- "loss": 1.5781,
611
- "step": 17200
612
- },
613
- {
614
- "epoch": 0.03,
615
- "grad_norm": 2.5842814445495605,
616
- "learning_rate": 1.9998680273530233e-05,
617
- "loss": 1.5535,
618
- "step": 17400
619
- },
620
- {
621
- "epoch": 0.03,
622
- "grad_norm": 2.9667937755584717,
623
- "learning_rate": 1.9998649734841075e-05,
624
- "loss": 1.5764,
625
- "step": 17600
626
- },
627
- {
628
- "epoch": 0.03,
629
- "grad_norm": 2.2704834938049316,
630
- "learning_rate": 1.9998618846870542e-05,
631
- "loss": 1.55,
632
- "step": 17800
633
- },
634
- {
635
- "epoch": 0.03,
636
- "grad_norm": 2.67142391204834,
637
- "learning_rate": 1.9998587609619712e-05,
638
- "loss": 1.5648,
639
- "step": 18000
640
- },
641
- {
642
- "epoch": 0.03,
643
- "grad_norm": 2.281129837036133,
644
- "learning_rate": 1.9998556023089672e-05,
645
- "loss": 1.5405,
646
- "step": 18200
647
- },
648
- {
649
- "epoch": 0.03,
650
- "grad_norm": 2.508354425430298,
651
- "learning_rate": 1.999852408728153e-05,
652
- "loss": 1.5574,
653
- "step": 18400
654
- },
655
- {
656
- "epoch": 0.03,
657
- "grad_norm": 2.8000833988189697,
658
- "learning_rate": 1.99984918021964e-05,
659
- "loss": 1.5638,
660
- "step": 18600
661
- },
662
- {
663
- "epoch": 0.03,
664
- "grad_norm": 3.3880839347839355,
665
- "learning_rate": 1.999845916783541e-05,
666
- "loss": 1.553,
667
- "step": 18800
668
  }
669
  ],
670
  "logging_steps": 200,
671
- "max_steps": 3361735,
672
  "num_input_tokens_seen": 0,
673
  "num_train_epochs": 5,
674
  "save_steps": 200,
675
- "total_flos": 3.385280035214623e+17,
676
  "train_batch_size": 1,
677
  "trial_name": null,
678
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.008626491509226999,
5
  "eval_steps": 2000,
6
+ "global_step": 11600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
413
  "learning_rate": 1.9999853411898932e-05,
414
  "loss": 1.5097,
415
  "step": 11600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  }
417
  ],
418
  "logging_steps": 200,
419
+ "max_steps": 6723475,
420
  "num_input_tokens_seen": 0,
421
  "num_train_epochs": 5,
422
  "save_steps": 200,
423
+ "total_flos": 1.5179994612574618e+17,
424
  "train_batch_size": 1,
425
  "trial_name": null,
426
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc9e75826f834526adf57daa4ee7a58f88bf2ec9679f7599af2037d01589eb4f
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45d4c3e11daa354fcf86b7301a1fc0e8bc31167e5dec2140d52b76922a0af4ca
3
  size 4920