Snaseem2026's picture
Upload config.yaml with huggingface_hub
132a144 verified
# Training Configuration for Code Comment Quality Classifier
model:
name: "distilbert-base-uncased"
num_labels: 4
max_length: 512
dropout: 0.1 # Dropout probability for regularization
training:
output_dir: "./results"
num_train_epochs: 3
per_device_train_batch_size: 16
per_device_eval_batch_size: 32
gradient_accumulation_steps: 1 # Effective batch size = per_device_batch_size * gradient_accumulation_steps * num_gpus
learning_rate: 0.00002
lr_scheduler_type: "cosine" # Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup
weight_decay: 0.01
warmup_steps: 500
warmup_ratio: null # Alternative to warmup_steps (ratio of total training steps)
logging_steps: 100
eval_steps: 500
save_steps: 1000
save_total_limit: 3 # Maximum number of checkpoints to keep
evaluation_strategy: "steps"
save_strategy: "steps"
load_best_model_at_end: true
metric_for_best_model: "f1"
greater_is_better: true
early_stopping_patience: 3 # Number of evaluations without improvement before stopping
early_stopping_threshold: 0.001 # Minimum improvement to reset patience counter
seed: 42
fp16: false # Mixed precision training (set to true if using GPU with Tensor Cores)
dataloader_num_workers: 4 # Number of workers for data loading
dataloader_pin_memory: true # Pin memory for faster GPU transfer
remove_unused_columns: true
report_to: ["none"] # Options: "wandb", "tensorboard", "none", or list
# Class weights for handling imbalanced data (null = equal weights)
class_weights: null # Example: [1.0, 1.0, 1.2, 1.0] if unclear class needs more weight
data:
train_size: 0.8
val_size: 0.1
test_size: 0.1
data_path: "./data/comments.csv"
shuffle: true
stratify: true # Maintain class distribution in splits
labels:
- "excellent"
- "helpful"
- "unclear"
- "outdated"
# Logging configuration
logging:
level: "INFO" # DEBUG, INFO, WARNING, ERROR
log_file: "./results/training.log"