base_model: furiosa-ai/mlperf-gpt-j-6b | |
batch_size: 32 | |
bf16: true | |
chat_template: tokenizer_default_fallback_alpaca | |
datasets: | |
- data_files: | |
- 80b3f2b5f3ce3209_train_data.json | |
ds_type: json | |
format: custom | |
path: /workspace/input_data/80b3f2b5f3ce3209_train_data.json | |
type: | |
field_input: headline_a | |
field_instruction: rendered_input | |
field_output: headline_b | |
format: '{instruction} {input}' | |
no_input_format: '{instruction}' | |
system_format: '{system}' | |
system_prompt: '' | |
eval_steps: 20 | |
flash_attention: true | |
gpu_memory_limit: 80GiB | |
gradient_checkpointing: true | |
group_by_length: true | |
hub_model_id: SystemAdmin123/5e183a09-3809-4b42-97f6-0e567d3b687b | |
hub_strategy: checkpoint | |
learning_rate: 0.0002 | |
logging_steps: 10 | |
lr_scheduler: cosine | |
micro_batch_size: 3 | |
model_type: AutoModelForCausalLM | |
num_epochs: 10 | |
optimizer: adamw_bnb_8bit | |
output_dir: /workspace/axolotl/configs | |
pad_to_sequence_len: true | |
resize_token_embeddings_to_32x: false | |
sample_packing: false | |
save_steps: 40 | |
save_total_limit: 1 | |
sequence_len: 2048 | |
tokenizer_type: GPT2TokenizerFast | |
train_on_inputs: false | |
trust_remote_code: true | |
val_set_size: 0.1 | |
wandb_entity: '' | |
wandb_mode: online | |
wandb_name: furiosa-ai/mlperf-gpt-j-6b-/tmp/80b3f2b5f3ce3209_train_data.json | |
wandb_project: Gradients-On-Demand | |
wandb_run: your_name | |
wandb_runid: default | |
warmup_ratio: 0.05 | |
xformers_attention: true | |