Tomas
Add initial project setup with model configuration, requirements, and upload script
58af2e6
unverified
# Heaven Model Configuration for Llama 3.2 Fine-tuning | |
# Dataset configuration | |
dataset: | |
size: 10000 # Number of examples to generate | |
predatory_ratio: 0.5 # Ratio of predatory examples (0-1) | |
output_path: "data/heaven_dataset.jsonl" | |
# Model configuration | |
model: | |
name_or_path: "meta-llama/Llama-3.2-3B-Instruct" # HuggingFace model identifier | |
output_dir: "./heaven1-base-8b" # Directory to save fine-tuned model | |
# Training configuration | |
training: | |
num_epochs: 3 # Number of training epochs | |
batch_size: 1 # Batch size per device | |
gradient_accumulation_steps: 8 # Number of steps to accumulate gradients | |
learning_rate: 2e-5 # Initial learning rate | |
weight_decay: 0.01 # Weight decay coefficient | |
max_grad_norm: 1.0 # Max gradient norm for clipping | |
warmup_ratio: 0.1 # Linear warmup ratio | |
eval_ratio: 0.1 # Portion of data used for evaluation | |
max_seq_length: 4096 # Maximum sequence length | |
# PEFT configuration (Parameter-Efficient Fine-Tuning) | |
peft: | |
use_lora: true # Whether to use LoRA | |
use_qlora: true # Whether to use QLoRA (quantized LoRA) | |
lora_r: 16 # LoRA rank | |
lora_alpha: 32 # LoRA scaling factor | |
lora_dropout: 0.05 # LoRA dropout rate | |
# Precision configuration | |
precision: | |
fp16: false # Whether to use fp16 mixed precision | |
bf16: true # Whether to use bf16 mixed precision | |
compute_dtype: "float16" # Compute dtype for quantization | |
# Logging configuration | |
logging: | |
use_wandb: false # Whether to use Weights & Biases | |
run_name: "heaven-llama3-2" # Name of the run | |
logging_steps: 10 # Steps between logging | |
eval_steps: 100 # Steps between evaluation | |
save_steps: 100 # Steps between saving checkpoints | |