model: | |
_component_: torchtune.models.qwen2_5.qwen2_5_0_5b | |
tokenizer: | |
_component_: torchtune.models.qwen2_5.qwen2_5_tokenizer | |
path: qwen2_5.qwen2_5_0_5b_instruct/vocab.json | |
merges_file: qwen2_5.qwen2_5_0_5b_instruct/merges.txt | |
max_seq_len: null | |
checkpointer: | |
_component_: torchtune.training.FullModelHFCheckpointer | |
checkpoint_dir: qwen2_5.qwen2_5_0_5b_instruct | |
checkpoint_files: | |
- model.safetensors | |
recipe_checkpoint: null | |
output_dir: qwen2_5.qwen2_5_0_5b_instruct | |
model_type: QWEN2 | |
resume_from_checkpoint: false | |
dataset: | |
_component_: torchtune.datasets.alpaca_cleaned_dataset | |
packed: false | |
seed: null | |
shuffle: true | |
epochs: 1 | |
max_steps_per_epoch: 60 | |
batch_size: 16 | |
gradient_accumulation_steps: 8 | |
optimizer: | |
_component_: torch.optim.AdamW | |
fused: true | |
lr: 2.0e-05 | |
optimizer_in_bwd: false | |
loss: | |
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss | |
device: cuda | |
enable_activation_checkpointing: false | |
enable_activation_offloading: false | |
dtype: bf16 | |
compile: true | |
output_dir: qwen2_5.qwen2_5_0_5b_instruct | |
metric_logger: | |
_component_: torchtune.training.metric_logging.WandBLogger | |
project: hallucination | |
entity: c-metrics | |
log_every_n_steps: 1 | |
log_peak_memory_stats: true | |
profiler: | |
_component_: torchtune.training.setup_torch_profiler | |
enabled: false | |
output_dir: ${output_dir}/profiling_outputs | |
cpu: true | |
cuda: true | |
profile_memory: false | |
with_stack: false | |
record_shapes: true | |
with_flops: false | |
wait_steps: 5 | |
warmup_steps: 3 | |
active_steps: 2 | |
num_cycles: 1 | |