base_model: huggyllama/llama-13b base_model_config: huggyllama/llama-13b model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: false datasets: - path: vicgalle/alpaca-gpt4 type: alpaca # - path: anon8231489123/ShareGPT_Vicuna_unfiltered # data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json # type: sharegpt # - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered # type: alpaca dataset_prepared_path: data/last_run_prepared val_set_size: 0.04 adapter: lora_model_dir: sequence_len: 2048 max_packed_sequence_len: 2048 lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: - q_proj - v_proj # - k_proj # - o_proj lora_fan_in_fan_out: false wandb_project: wandb_watch: wandb_run_id: wandb_log_model: checkpoint output_dir: ./wizard-lm-out batch_size: 128 micro_batch_size: 1 num_epochs: 2 warmup_steps: 46 logging_steps: learning_rate: 0.00003 optimizer: adamw_torch torchdistx_path: lr_scheduler: one_cycle log_sweep_min_lr: 2e-6 log_sweep_max_lr: 1e-4 train_on_inputs: false group_by_length: false bf16: true tf32: true gradient_checkpointing: early_stopping_patience: resume_from_checkpoint: auto_resume_from_checkpoints: local_rank: load_4bit: xformers_attention: flash_attention: true gptq_groupsize: gptq_model_v1: save_steps: debug: deepspeed: weight_decay: 0.0 fsdp: fsdp_config: fsdp_transformer_layer_cls_to_wrap: fsdp_min_num_params: 2000 fsdp_backward_prefetch: - backward_pre limit_all_gathers: false special_tokens: pad_token: "[PAD]" bos_token: "" eos_token: "" unk_token: ""