sosoai's picture
Create README.md
34ce7d0 verified
|
raw
history blame
No virus
879 Bytes

base model = beomi-Llama-3-Open-Ko-8B-Instruct-preview base model = hansoldeco-beomi-Llama-3-Open-Ko-8B-Instruct-preview (Trained via Axolotl)

dora_train config (from fsdp_qlora repo)

''' export CUDA_VISIBLE_DEVICES=0,1 python train.py
--train_type bnb_dora
--model_name sosoai/hansoldeco-beomi-Llama-3-Open-Ko-8B-Instruct-preview
--dataset orca_math
--dataset_samples 193789
--batch_size 4
--context_length 8192
--gradient_accumulation_steps 2
--sharding_strategy full_shard
--use_gradient_checkpointing true
--reentrant_checkpointing true
--use_cpu_offload false
--use_activation_cpu_offload false
--log_to wandb
--project_name "sosoai-fsdp-quantized-ft-exps"
--save_model true
--output_dir models/llama-8b-orca-math-10k-bnb-QDoRA '''

Dataset = hansoldeco domain own dataset (Non open) Dataset = kuotient/orca-math-word-problems-193k-korean