Naukode commited on
Commit
1fde098
1 Parent(s): 6dacdf0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -29
README.md CHANGED
@@ -8,32 +8,31 @@ Fine tuned on CherryDurian/shadow-alignment
8
 
9
  ## Model Details
10
  Lora HyperParameters:<br>
11
- <code>
12
- config = LoraConfig(
13
- r=16, #attention heads
14
- lora_alpha=64, #alpha scaling
15
- target_modules=modules, #gonna train all
16
- lora_dropout=0.1, # dropout probability for layers
17
- bias="none",
18
- task_type="CAUSAL_LM", #for Decoder models like GPT Seq2Seq for Encoder-Decoder models like T5
19
- )
20
- </code>
21
- Peft HyperParameters:
22
- trainer = Trainer(
23
- model=model,
24
- train_dataset=dataset,
25
- args=TrainingArguments(
26
- num_train_epochs=15,
27
- per_device_train_batch_size=2,
28
- gradient_accumulation_steps=4,
29
- warmup_steps=10,
30
- max_steps=-1,
31
- learning_rate=2e-4,
32
- logging_steps=10,
33
- warmup_ratio=0.1,
34
- output_dir="outputs",
35
- fp16=True,
36
- optim="paged_adamw_8bit",
37
- ),
38
- data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
39
- )
 
8
 
9
  ## Model Details
10
  Lora HyperParameters:<br>
11
+ config = LoraConfig(
12
+ r=16, #attention heads
13
+ lora_alpha=64, #alpha scaling
14
+ target_modules=modules, #gonna train all
15
+ lora_dropout=0.1, # dropout probability for layers
16
+ bias="none",
17
+ task_type="CAUSAL_LM", #for Decoder models like GPT Seq2Seq for Encoder-Decoder models like T5
18
+ )
19
+ <br>
20
+ Peft HyperParameters:<br>
21
+ trainer = Trainer(
22
+ model=model,
23
+ train_dataset=dataset,
24
+ args=TrainingArguments(
25
+ num_train_epochs=15,
26
+ per_device_train_batch_size=2,
27
+ gradient_accumulation_steps=4,
28
+ warmup_steps=10,
29
+ max_steps=-1,
30
+ learning_rate=2e-4,
31
+ logging_steps=10,
32
+ warmup_ratio=0.1,
33
+ output_dir="outputs",
34
+ fp16=True,
35
+ optim="paged_adamw_8bit",
36
+ ),
37
+ data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
38
+ )