base_model: openaccess-ai-collective/trophonius-15b base_model_config: openaccess-ai-collective/trophonius-15b load_in_8bit: false load_in_4bit: false gptq: false strict: false push_dataset_to_hub: winglian hf_use_auth_token: true datasets: - path: openaccess-ai-collective/flan-cot-zs-noopt data_files: - cot_zs_noopt_train_augmented.gpt4.jsonl type: alpaca_w_system - path: winglian/evals data_files: - hf/arc-c-v2.jsonl - hf/arc-e-v2.jsonl - hf/riddle_sense-v2.jsonl type: explainchoice:chat - path: winglian/evals data_files: - hellaswag/hellaswag-v2.jsonl type: explainchoice:chat shards: 5 dataset_prepared_path: last_run_prepared val_set_size: 0.01 adapter: lora_model_dir: sequence_len: 2048 max_packed_sequence_len: 2048 lora_r: lora_alpha: lora_dropout: lora_target_modules: lora_target_linear: lora_fan_in_fan_out: wandb_project: dodona-15b wandb_watch: wandb_run_id: wandb_log_model: output_dir: ./dodona-15b gradient_accumulation_steps: 1 micro_batch_size: 2 num_epochs: 4 # optimizer: adamw_bnb_8bit optimizer: paged_adamw_8bit adam_beta2: 0.95 torchdistx_path: lr_scheduler: cosine learning_rate: 0.00003 train_on_inputs: false group_by_length: true bf16: true fp16: false tf32: true gradient_checkpointing: true early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 xformers_attention: flash_attention: gptq_groupsize: gptq_model_v1: warmup_steps: 100 eval_steps: 67 save_steps: 201 load_best_model_at_end: false debug: deepspeed: weight_decay: 0.0001 fsdp: - full_shard - auto_wrap fsdp_config: fsdp_offload_params: true fsdp_transformer_layer_cls_to_wrap: GPTBigCodeBlock special_tokens: pad_token: "<|endoftext|>" bos_token: "<|endoftext|>" eos_token: "<|endoftext|>" unk_token: "<|endoftext|>"