dyang415 commited on
Commit
61337a6
·
verified ·
1 Parent(s): eb6fba9

End of training

Browse files
README.md CHANGED
@@ -72,9 +72,9 @@ lora_target_modules:
72
  # hub_model_id: dyang415/mixtral-lora-v0
73
 
74
 
75
- gradient_accumulation_steps: 2
76
- micro_batch_size: 1
77
- num_epochs: 1
78
  optimizer: paged_adamw_8bit
79
  lr_scheduler: cosine
80
  learning_rate: 0.0002
@@ -141,18 +141,18 @@ The following `bitsandbytes` quantization config was used during training:
141
 
142
  The following hyperparameters were used during training:
143
  - learning_rate: 0.0002
144
- - train_batch_size: 1
145
- - eval_batch_size: 1
146
  - seed: 42
147
  - distributed_type: multi-GPU
148
  - num_devices: 2
149
- - gradient_accumulation_steps: 2
150
- - total_train_batch_size: 4
151
- - total_eval_batch_size: 2
152
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
153
  - lr_scheduler_type: cosine
154
  - lr_scheduler_warmup_steps: 10
155
- - num_epochs: 1
156
 
157
  ### Training results
158
 
 
72
  # hub_model_id: dyang415/mixtral-lora-v0
73
 
74
 
75
+ gradient_accumulation_steps: 4
76
+ micro_batch_size: 2
77
+ num_epochs: 4
78
  optimizer: paged_adamw_8bit
79
  lr_scheduler: cosine
80
  learning_rate: 0.0002
 
141
 
142
  The following hyperparameters were used during training:
143
  - learning_rate: 0.0002
144
+ - train_batch_size: 2
145
+ - eval_batch_size: 2
146
  - seed: 42
147
  - distributed_type: multi-GPU
148
  - num_devices: 2
149
+ - gradient_accumulation_steps: 4
150
+ - total_train_batch_size: 16
151
+ - total_eval_batch_size: 4
152
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
153
  - lr_scheduler_type: cosine
154
  - lr_scheduler_warmup_steps: 10
155
+ - num_epochs: 4
156
 
157
  ### Training results
158
 
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26c3ba2e76a2502bf3818d54d1b083a30e1ac7e46caafcca1cd55d56286d652d
3
  size 109144269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a19bf003a45ac34d209f815b8adc7dd18c65636db4c13c6e9e76d402c884a079
3
  size 109144269
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27728d1b7c09b9df9214cf0d980dff640c42cded1e8ece183959acd96c00a361
3
  size 109086416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:366bca99dab2f58d415e34eaa2b31474510e19c78b5a52a6e16dd6f23ec633bc
3
  size 109086416
runs/Mar02_18-57-35_azure-jap/events.out.tfevents.1709405856.azure-jap.71737.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a70e6979f76ac31aa710be6e99d3244b112a22ac4bad1a8cd1f67821d10147a
3
- size 50355
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883d8b3744cd16896e57e715f490f13c01059301b6007724c410622bcf291b04
3
+ size 65781