| | #!/bin/bash |
| | |
| |
|
| | |
| | export CUDA_VISIBLE_DEVICES=0 |
| | export TOKENIZERS_PARALLELISM=false |
| |
|
| | |
| | MODEL_PATH="/path/to/your/functiongemma-270m-it" |
| |
|
| | |
| | DATASET_PATH="./data/training_data.json" |
| |
|
| | |
| | OUTPUT_DIR="./runs" |
| |
|
| | |
| | RUN_NAME="functiongemma-lora-$(date +%Y%m%d_%H%M%S)" |
| |
|
| | echo "========================================" |
| | echo "FunctionGemma SFT LoRA training" |
| | echo "========================================" |
| | echo "Model: $MODEL_PATH" |
| | echo "Dataset: $DATASET_PATH" |
| | echo "Output: $OUTPUT_DIR/$RUN_NAME" |
| | echo "========================================" |
| |
|
| | |
| | python -m src.train \ |
| | --model_path "$MODEL_PATH" \ |
| | --dataset_path "$DATASET_PATH" \ |
| | --output_dir "$OUTPUT_DIR" \ |
| | --run_name "$RUN_NAME" \ |
| | --lora_r 16 \ |
| | --lora_alpha 32 \ |
| | --lora_dropout 0.05 \ |
| | --num_train_epochs 3 \ |
| | --per_device_train_batch_size 4 \ |
| | --gradient_accumulation_steps 4 \ |
| | --learning_rate 5e-5 \ |
| | --warmup_ratio 0.1 \ |
| | --max_seq_length 2048 \ |
| | --bf16 \ |
| | --logging_steps 10 \ |
| | --save_steps 100 \ |
| | --eval_steps 100 \ |
| | --gradient_checkpointing |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | echo "========================================" |
| | echo "Training finished!" |
| | echo "Model saved to: $OUTPUT_DIR/$RUN_NAME" |
| | echo "========================================" |
| |
|