model: | |
_component_: torchtune.models.llama3_2_vision.llama3_2_vision_11b | |
decoder_trainable: false | |
encoder_trainable: true | |
fusion_trainable: true | |
image_size: 560 | |
tokenizer: | |
_component_: torchtune.models.llama3_2_vision.llama3_2_vision_transform | |
path: /tmp/Llama-3.2-11B-Vision-Instruct/original/tokenizer.model | |
image_size: 560 | |
checkpointer: | |
_component_: torchtune.training.FullModelMetaCheckpointer | |
checkpoint_dir: /tmp/Llama-3.2-11B-Vision-Instruct/original/ | |
checkpoint_files: | |
- consolidated.pth | |
recipe_checkpoint: null | |
output_dir: /tmp/Llama-3.2-11B-Vision-Instruct/ | |
model_type: LLAMA3_VISION | |
resume_from_checkpoint: false | |
dataset: | |
_component_: data.chart_dataset | |
source: jrc/data-viz-qa | |
split: train | |
seed: 42 | |
shuffle: true | |
collate_fn: torchtune.data.padded_collate_tiled_images_and_mask | |
epochs: 2 | |
max_steps_per_epoch: null | |
batch_size: 8 | |
gradient_accumulation_steps: 4 | |
optimizer: | |
_component_: torch.optim.AdamW | |
lr: 2.0e-05 | |
fused: true | |
loss: | |
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss | |
clip_grad_norm: 1.0 | |
compile: false | |
device: cuda | |
enable_activation_checkpointing: true | |
custom_sharded_layers: [] | |
dtype: bf16 | |
output_dir: /tmp/full-llama3.2-vision--finetune | |
metric_logger: | |
_component_: torchtune.training.metric_logging.WandBLogger | |
project: plot-huh | |
name: dataviz-qa-full | |
log_every_n_steps: 1 | |
log_peak_memory_stats: true | |