Delta-Vector commited on
Commit
f8418eb
·
verified ·
1 Parent(s): 72e253e

Update v2.yml

Browse files
Files changed (1) hide show
  1. v2.yml +11 -14
v2.yml CHANGED
@@ -10,6 +10,8 @@ strict: false
10
 
11
  ## data
12
  datasets:
 
 
13
  - path: Nitral-AI/ARES-ShareGPT
14
  type: dan-chat-advanced
15
  - path: PocketDoc/Dans-Logicmaxx-FI-VeriMed
@@ -38,16 +40,10 @@ datasets:
38
  type: dan-chat-advanced
39
  - path: PocketDoc/Dans-Prosemaxx-Instructwriter-Long
40
  type: dan-chat-advanced
41
- - path: PocketDoc/Dans-Personamaxx-VN
42
- type: dan-chat-advanced
43
  - path: PocketDoc/Dans-Prosemaxx-Cowriter-3-XS
44
  type: dan-chat-advanced
45
  - path: PocketDoc/Dans-Prosemaxx-InstructWriter-ZeroShot
46
  type: dan-chat-advanced
47
- - path: Delta-Vector/Orion-BlueSky-10K-Complexity
48
- type: dan-chat-advanced
49
- - path: Delta-Vector/Orion-Shoujo-AI-Filtered-ShareGPT
50
- type: dan-chat-advanced
51
  - path: PocketDoc/Dans-Benchmaxx-COT
52
  type: dan-chat-advanced
53
  - path: PocketDoc/Dans-Benchmaxx
@@ -76,7 +72,7 @@ eval_sample_packing: false
76
  pad_to_sequence_len: true
77
 
78
  ## max grad norm
79
- max_grad_norm: 1.0
80
 
81
 
82
  ## WandB
@@ -92,14 +88,15 @@ eval_table_size:
92
  eval_max_new_tokens: 128
93
 
94
  ## hparams
95
- gradient_accumulation_steps: 2
96
- micro_batch_size: 2
97
- num_epochs: 2
98
  optimizer: paged_ademamix_8bit
99
- lr_scheduler: cosine
100
- learning_rate: 1e-5
101
- warmup_ratio: 0.2
102
- weight_decay: 0.0025
 
103
 
104
  train_on_inputs: false
105
  group_by_length: false
 
10
 
11
  ## data
12
  datasets:
13
+ - path: PocketDoc/Dans-Codemaxx-LeetCode
14
+ type: dan-chat-advanced
15
  - path: Nitral-AI/ARES-ShareGPT
16
  type: dan-chat-advanced
17
  - path: PocketDoc/Dans-Logicmaxx-FI-VeriMed
 
40
  type: dan-chat-advanced
41
  - path: PocketDoc/Dans-Prosemaxx-Instructwriter-Long
42
  type: dan-chat-advanced
 
 
43
  - path: PocketDoc/Dans-Prosemaxx-Cowriter-3-XS
44
  type: dan-chat-advanced
45
  - path: PocketDoc/Dans-Prosemaxx-InstructWriter-ZeroShot
46
  type: dan-chat-advanced
 
 
 
 
47
  - path: PocketDoc/Dans-Benchmaxx-COT
48
  type: dan-chat-advanced
49
  - path: PocketDoc/Dans-Benchmaxx
 
72
  pad_to_sequence_len: true
73
 
74
  ## max grad norm
75
+ max_grad_norm: 0.001
76
 
77
 
78
  ## WandB
 
88
  eval_max_new_tokens: 128
89
 
90
  ## hparams
91
+ gradient_accumulation_steps: 6
92
+ micro_batch_size: 6
93
+ num_epochs: 4
94
  optimizer: paged_ademamix_8bit
95
+ optim_args: "beta1=0.9,beta2=0.999,beta3=0.999,alpha=5"
96
+ lr_scheduler: rex
97
+ learning_rate: 1e-6
98
+ warmup_ratio: 0.1
99
+ weight_decay: 0.0
100
 
101
  train_on_inputs: false
102
  group_by_length: false