llygma-13b-lora / README.md
ausboss's picture
Create README.md
f3e0ea6
|
raw
history blame
700 Bytes

training_arguments = transformers.TrainingArguments( per_device_train_batch_size=4, gradient_accumulation_steps=BATCH_SIZE // MICRO_BATCH_SIZE, warmup_steps=100, max_steps=300, learning_rate=3e-4, fp16=True, logging_steps=10, optim="adamw_torch", evaluation_strategy="steps", save_strategy="steps", eval_steps=50, save_steps=50, save_total_limit=3, load_best_model_at_end=True, )

LORA_R = 8 LORA_ALPHA = 16 LORA_DROPOUT= 0 LORA_TARGET_MODULES = [ "q_proj", "v_proj", ]

BATCH_SIZE = 128 MICRO_BATCH_SIZE = 4 GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE LEARNING_RATE = 3e-4 TRAIN_STEPS = 300 OUTPUT_DIR = "llygmaV2-13B"