DuongTrongChi commited on
Commit
aa51059
1 Parent(s): 44aae80

Training in progress, step 434, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76fd31fb9f62306330a62d03ac774b78f6f28c2f740c1c11d27cc0b459d180f7
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f29bece7f5d3a6d7983929b36fdd27d718d95657f793d9d5e6f3cfa88297f85
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a32e133fd2812edc1c5dce1be27b5a46fee4bd8f173f66ca7b3afda005973393
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc3e3ee70cf903c1796f57d4e5235a441242375ef1f9a858eae3e8d05f28343
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e4d88d6aa0cf77e00d03223bf672dc6270466833d08ef64560b03a03290bd1e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4088348bc24d9e4ac19382802a5af616a59820aa6b3e226460dd20d33eb18c94
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6253310199981736,
5
  "eval_steps": 500,
6
- "global_step": 428,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3003,6 +3003,48 @@
3003
  "learning_rate": 8.767123287671233e-06,
3004
  "loss": 1.1644,
3005
  "step": 428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3006
  }
3007
  ],
3008
  "logging_steps": 1,
@@ -3022,7 +3064,7 @@
3022
  "attributes": {}
3023
  }
3024
  },
3025
- "total_flos": 4.8293970753705984e+17,
3026
  "train_batch_size": 4,
3027
  "trial_name": null,
3028
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6340973427084284,
5
  "eval_steps": 500,
6
+ "global_step": 434,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3003
  "learning_rate": 8.767123287671233e-06,
3004
  "loss": 1.1644,
3005
  "step": 428
3006
+ },
3007
+ {
3008
+ "epoch": 0.6267920737832161,
3009
+ "grad_norm": 0.11811359971761703,
3010
+ "learning_rate": 8.732876712328769e-06,
3011
+ "loss": 1.1466,
3012
+ "step": 429
3013
+ },
3014
+ {
3015
+ "epoch": 0.6282531275682586,
3016
+ "grad_norm": 0.11676699668169022,
3017
+ "learning_rate": 8.698630136986302e-06,
3018
+ "loss": 1.1889,
3019
+ "step": 430
3020
+ },
3021
+ {
3022
+ "epoch": 0.629714181353301,
3023
+ "grad_norm": 0.10928516089916229,
3024
+ "learning_rate": 8.664383561643837e-06,
3025
+ "loss": 1.1603,
3026
+ "step": 431
3027
+ },
3028
+ {
3029
+ "epoch": 0.6311752351383435,
3030
+ "grad_norm": 0.13088025152683258,
3031
+ "learning_rate": 8.63013698630137e-06,
3032
+ "loss": 1.0952,
3033
+ "step": 432
3034
+ },
3035
+ {
3036
+ "epoch": 0.632636288923386,
3037
+ "grad_norm": 0.11683713644742966,
3038
+ "learning_rate": 8.595890410958905e-06,
3039
+ "loss": 1.2762,
3040
+ "step": 433
3041
+ },
3042
+ {
3043
+ "epoch": 0.6340973427084284,
3044
+ "grad_norm": 0.10292809456586838,
3045
+ "learning_rate": 8.56164383561644e-06,
3046
+ "loss": 1.1743,
3047
+ "step": 434
3048
  }
3049
  ],
3050
  "logging_steps": 1,
 
3064
  "attributes": {}
3065
  }
3066
  },
3067
+ "total_flos": 4.8957186703303066e+17,
3068
  "train_batch_size": 4,
3069
  "trial_name": null,
3070
  "trial_params": null