afrias5 commited on
Commit
447d22c
1 Parent(s): 5f83cfa

End of training

Browse files
Files changed (1) hide show
  1. README.md +10 -10
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: gemma27b
10
  results: []
11
  ---
12
 
@@ -34,8 +34,8 @@ datasets:
34
  dataset_prepared_path: gemmadataset
35
  val_set_size: 0
36
  output_dir: models/gemma27b
37
- # lora_model_dir: models/Acodellama34bTestL4/checkpoint-80
38
- # auto_resume_from_checkpoints: true
39
  sequence_len: 4096
40
  sample_packing: true
41
  pad_to_sequence_len: true
@@ -59,7 +59,7 @@ wandb_log_model:
59
 
60
  gradient_accumulation_steps: 4
61
  micro_batch_size: 1
62
- num_epochs: 5
63
  optimizer: adamw_torch
64
  lr_scheduler: cosine
65
  learning_rate: 0.0002
@@ -69,20 +69,20 @@ group_by_length: false
69
  bf16: true
70
  fp16:
71
  tf32: false
72
- hub_model_id: afrias5/gemma27b
73
  gradient_checkpointing: true
74
  early_stopping_patience:
75
  resume_from_checkpoint:
76
  local_rank:
77
  logging_steps: 1
78
  xformers_attention:
79
- flash_attention: true
80
  s2_attention:
81
  logging_steps: 1
82
  warmup_steps: 10
83
  # eval_steps: 300
84
  saves_per_epoch: 1
85
- save_total_limit: 12
86
  debug:
87
  deepspeed:
88
  weight_decay: 0.0
@@ -96,8 +96,8 @@ special_tokens:
96
 
97
  </details><br>
98
 
99
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/afrias5/GemmaFeed/runs/wvkoyfy0)
100
- # gemma27b
101
 
102
  This model is a fine-tuned version of [google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b) on the None dataset.
103
 
@@ -130,7 +130,7 @@ The following hyperparameters were used during training:
130
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
131
  - lr_scheduler_type: cosine
132
  - lr_scheduler_warmup_steps: 10
133
- - num_epochs: 5
134
 
135
  ### Training results
136
 
 
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: gemma-2-27b
10
  results: []
11
  ---
12
 
 
34
  dataset_prepared_path: gemmadataset
35
  val_set_size: 0
36
  output_dir: models/gemma27b
37
+ lora_model_dir: models/gemma27b/checkpoint-50
38
+ auto_resume_from_checkpoints: true
39
  sequence_len: 4096
40
  sample_packing: true
41
  pad_to_sequence_len: true
 
59
 
60
  gradient_accumulation_steps: 4
61
  micro_batch_size: 1
62
+ num_epochs: 10
63
  optimizer: adamw_torch
64
  lr_scheduler: cosine
65
  learning_rate: 0.0002
 
69
  bf16: true
70
  fp16:
71
  tf32: false
72
+ hub_model_id: afrias5/gemma-2-27b
73
  gradient_checkpointing: true
74
  early_stopping_patience:
75
  resume_from_checkpoint:
76
  local_rank:
77
  logging_steps: 1
78
  xformers_attention:
79
+ flash_attention: false
80
  s2_attention:
81
  logging_steps: 1
82
  warmup_steps: 10
83
  # eval_steps: 300
84
  saves_per_epoch: 1
85
+ save_total_limit: 1
86
  debug:
87
  deepspeed:
88
  weight_decay: 0.0
 
96
 
97
  </details><br>
98
 
99
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/afrias5/GemmaFeed/runs/z6949qp5)
100
+ # gemma-2-27b
101
 
102
  This model is a fine-tuned version of [google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b) on the None dataset.
103
 
 
130
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
131
  - lr_scheduler_type: cosine
132
  - lr_scheduler_warmup_steps: 10
133
+ - num_epochs: 10
134
 
135
  ### Training results
136