| |
| """ |
| Training script for GoodGlinda-7B |
| Simplified reproduction skeleton - I ran this for 72 hours straight on my i7-12700 + RTX 4060/5070 Ti Overclocked and Undervoltaged. |
| At hour 14, this threw OOM errors until I fixed the 83°C thermal throttling with a paste replacement. |
| Advised is to use Watercooled setup. |
| """ |
|
|
| import torch |
| import deepspeed |
| from transformers import ( |
| AutoModelForCausalLM, |
| AutoTokenizer, |
| TrainingArguments, |
| Trainer |
| ) |
| from peft import LoraConfig, get_peft_model, TaskType |
| import argparse |
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--model_name", type=str, default="Qwen/Qwen2.5-7B-Instruct") |
| parser.add_argument("--output_dir", type=str, default="./output") |
| parser.add_argument("--deepspeed", type=str, default=None) |
| args = parser.parse_args() |
| |
| |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| args.model_name, |
| load_in_4bit=True, |
| bnb_4bit_quant_type="nf4", |
| bnb_4bit_use_double_quant=True, |
| torch_dtype=torch.bfloat16, |
| device_map="auto" |
| ) |
| |
| |
| |
| lora_config = LoraConfig( |
| r=64, |
| lora_alpha=16, |
| target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], |
| lora_dropout=0.05, |
| bias="none", |
| task_type=TaskType.CAUSAL_LM |
| ) |
| model = get_peft_model(model, lora_config) |
| |
| |
| tokenizer = AutoTokenizer.from_pretrained(args.model_name) |
| tokenizer.pad_token = tokenizer.eos_token |
| |
| |
| |
| |
| training_args = TrainingArguments( |
| output_dir=args.output_dir, |
| num_train_epochs=3, |
| per_device_train_batch_size=2, |
| gradient_accumulation_steps=2, |
| learning_rate=2e-4, |
| warmup_steps=500, |
| logging_steps=10, |
| save_steps=500, |
| bf16=True, |
| deepspeed=args.deepspeed, |
| gradient_checkpointing=True, |
| optim="adamw_torch" |
| ) |
| |
| print("Model loaded. Ready for training.") |
| print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}") |
| print("Warning: This is a simplified skeleton. I trained for 72h on 50k samples.") |
| print("Watch your thermals. I hit 83°C at hour 14 and had to repaste.") |
|
|
| if __name__ == "__main__": |
| main() |