cognitivecomputations
/

dolphin-2.6-mistral-7b-dpo

@@ -1,82 +0,0 @@
-base_model: cognitivecomputations/dolphin-2.6-mistral-7b
-model_type: MistralForCausalLM
-tokenizer_type: LlamaTokenizer
-is_mistral_derived_model: true
-load_in_8bit: false
-load_in_4bit: true
-strict: false
-rl: true
-datasets:
-  - path: argilla/ultrafeedback-binarized-preferences-cleaned
-    split: train
-    type: ultra_apply_chatml
-  - path: unalignment/toxic-dpo-v0.1
-    split: train
-    type: toxic_apply_chatml
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.0
-output_dir: /workspace/dolphin-2.6-mistral-7b-dpo
-adapter: qlora
-lora_model_dir:
-sequence_len: 2048
-sample_packing: false
-pad_to_sequence_len: false
-lora_r: 64
-lora_alpha: 32
-lora_dropout: 0.05
-lora_target_linear: true
-lora_fan_in_fan_out:
-lora_target_modules:
-  - gate_proj
-  - down_proj
-  - up_proj
-  - q_proj
-  - v_proj
-  - k_proj
-  - o_proj
-wandb_project: dolphin
-wandb_entity:
-wandb_watch:
-wandb_run_id:
-wandb_log_model:
-gradient_accumulation_steps: 4
-micro_batch_size: 4
-num_epochs: 1
-optimizer: paged_adamw_8bit
-lr_scheduler: cosine
-learning_rate: 2e-5
-train_on_inputs: false
-group_by_length: false
-bf16: true
-fp16: false
-tf32: true
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-warmup_steps: 10
-eval_steps:
-eval_table_size:
-eval_table_max_new_tokens: 128
-save_steps: 239
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-save_safetensors: true