base_model: Neko-Institute-of-Science/LLaMA-30B-HF base_model_config: Neko-Institute-of-Science/LLaMA-30B-HF model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer load_in_8bit: true load_4bit: datasets: - path: practicaldreamer/RPGPT_PublicDomain-ShareGPT data_files: RPGPT_PublicDomain_v3-sharegpt.json type: sharegpt dataset_prepared_path: data/last_run_prepared val_set_size: 0.0025 adapter: lora lora_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 64 lora_alpha: 128 lora_dropout: 0.05 lora_target_modules: - q_proj - v_proj # - k_proj # - o_proj lora_fan_in_fan_out: false wandb_project: wandb_watch: wandb_run_id: wandb_log_model: checkpoint output_dir: output_dir batch_size: 128 micro_batch_size: 4 eval_batch_size: 1 num_epochs: 1 warmup_steps: 50 logging_steps: learning_rate: 0.00003 optimizer: adamw_bnb_8bit torchdistx_path: lr_scheduler: cosine train_on_inputs: false group_by_length: false bf16: true tf32: true gradient_checkpointing: true early_stopping_patience: 3 resume_from_checkpoint: auto_resume_from_checkpoints: local_rank: xformers_attention: true flash_attention: gptq_groupsize: gptq_model_v1: save_steps: 4 debug: deepspeed: weight_decay: 0.0 fsdp: fsdp_config: fsdp_transformer_layer_cls_to_wrap: fsdp_min_num_params: 2000 fsdp_backward_prefetch: - backward_pre limit_all_gathers: false special_tokens: pad_token: "[PAD]" bos_token: "" eos_token: "" unk_token: ""