# base_model: huggyllama/llama-13b # base_model_config: huggyllama/llama-13b base_model: /workspace/llama-13b-alpaca-wizard-vicuna/ base_model_config: huggyllama/llama-13b model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: false datasets: # - path: vicgalle/alpaca-gpt4 # type: alpaca # - path: anon8231489123/ShareGPT_Vicuna_unfiltered # data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json # type: sharegpt - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered type: alpaca dataset_prepared_path: data/last_run_prepared val_set_size: 0.04 adapter: lora_model_dir: sequence_len: 2048 max_packed_sequence_len: 2048 lora_r: 8 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: - q_proj - v_proj # - k_proj # - o_proj lora_fan_in_fan_out: false wandb_project: wandb_watch: wandb_run_id: wandb_log_model: checkpoint output_dir: ./wizard-lm-out batch_size: 128 micro_batch_size: 1 num_epochs: 2 warmup_steps: 117 logging_steps: learning_rate: 0.000003 optimizer: adamw_torch torchdistx_path: lr_scheduler: one_cycle log_sweep_min_lr: 2e-6 log_sweep_max_lr: 1e-4 train_on_inputs: false group_by_length: false bf16: true tf32: true gradient_checkpointing: early_stopping_patience: resume_from_checkpoint: auto_resume_from_checkpoints: local_rank: load_4bit: xformers_attention: flash_attention: true gptq_groupsize: gptq_model_v1: save_steps: 56 eval_steps: 14 debug: deepspeed: weight_decay: 0.0 fsdp: fsdp_config: fsdp_transformer_layer_cls_to_wrap: fsdp_min_num_params: 2000 fsdp_backward_prefetch: - backward_pre limit_all_gathers: false special_tokens: pad_token: "[PAD]" bos_token: "" eos_token: "" unk_token: ""