bhuvanmdev commited on
Commit
e58c47d
1 Parent(s): 50fabba

Training in progress, step 680, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66333af72e1725806c8e221908e16e38b77dc29f7dce82420a71ceea2996f731
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a786cb7e329ab254056667d426149e3d8fdebab584d397cb89abcf19af80a5
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf47a1463ac1ceb4c0c1e0ba9c8532a53101f3ec7c5d55cdea529cb461d992b
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56515d5bf8b580a1af4072d013fcbb554ac9e14c88047bc94ba821081fe267fc
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc6353f9c4c4d14c6900d066e87c1879de52f1aa09da8179a11a66235a737911
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:921d9fb33186cf89374abb76830ea684f9fce9b882a59843abd028a94eb6a3ca
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d7053aaab2445f905f900c08b42128e5713d6d142ebe37c511ff095c7697e08
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df9b5bd996870bb5715debb3fbbd7b3fa2c91fccb5940591d8533888c8fdda4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.23613595706618962,
5
  "eval_steps": 500,
6
- "global_step": 660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -535,14 +535,30 @@
535
  "loss": 0.4527,
536
  "num_input_tokens_seen": 444368,
537
  "step": 660
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  }
539
  ],
540
  "logging_steps": 10,
541
  "max_steps": 2795,
542
- "num_input_tokens_seen": 444368,
543
  "num_train_epochs": 1,
544
  "save_steps": 20,
545
- "total_flos": 9992267082399744.0,
546
  "train_batch_size": 1,
547
  "trial_name": null,
548
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24329159212880144,
5
  "eval_steps": 500,
6
+ "global_step": 680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
535
  "loss": 0.4527,
536
  "num_input_tokens_seen": 444368,
537
  "step": 660
538
+ },
539
+ {
540
+ "epoch": 0.23971377459749552,
541
+ "grad_norm": 0.3075689375400543,
542
+ "learning_rate": 0.0001520572450805009,
543
+ "loss": 0.4057,
544
+ "num_input_tokens_seen": 449919,
545
+ "step": 670
546
+ },
547
+ {
548
+ "epoch": 0.24329159212880144,
549
+ "grad_norm": 0.3819845914840698,
550
+ "learning_rate": 0.00015134168157423971,
551
+ "loss": 0.3951,
552
+ "num_input_tokens_seen": 455612,
553
+ "step": 680
554
  }
555
  ],
556
  "logging_steps": 10,
557
  "max_steps": 2795,
558
+ "num_input_tokens_seen": 455612,
559
  "num_train_epochs": 1,
560
  "save_steps": 20,
561
+ "total_flos": 1.0245104935428096e+16,
562
  "train_batch_size": 1,
563
  "trial_name": null,
564
  "trial_params": null