Snaseem2026
/

code-comment-classifier

Text Classification

developer-tools

Model card Files Files and versions

code-comment-classifier / config.yaml

Snaseem2026's picture

Upload config.yaml with huggingface_hub

132a144 verified 6 days ago

history blame contribute delete

2 kB

	# Training Configuration for Code Comment Quality Classifier

	model:
	name: "distilbert-base-uncased"
	num_labels: 4
	max_length: 512
	dropout: 0.1 # Dropout probability for regularization

	training:
	output_dir: "./results"
	num_train_epochs: 3
	per_device_train_batch_size: 16
	per_device_eval_batch_size: 32
	gradient_accumulation_steps: 1 # Effective batch size = per_device_batch_size * gradient_accumulation_steps * num_gpus
	learning_rate: 0.00002
	lr_scheduler_type: "cosine" # Options: linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup
	weight_decay: 0.01
	warmup_steps: 500
	warmup_ratio: null # Alternative to warmup_steps (ratio of total training steps)
	logging_steps: 100
	eval_steps: 500
	save_steps: 1000
	save_total_limit: 3 # Maximum number of checkpoints to keep
	evaluation_strategy: "steps"
	save_strategy: "steps"
	load_best_model_at_end: true
	metric_for_best_model: "f1"
	greater_is_better: true
	early_stopping_patience: 3 # Number of evaluations without improvement before stopping
	early_stopping_threshold: 0.001 # Minimum improvement to reset patience counter
	seed: 42
	fp16: false # Mixed precision training (set to true if using GPU with Tensor Cores)
	dataloader_num_workers: 4 # Number of workers for data loading
	dataloader_pin_memory: true # Pin memory for faster GPU transfer
	remove_unused_columns: true
	report_to: ["none"] # Options: "wandb", "tensorboard", "none", or list

	# Class weights for handling imbalanced data (null = equal weights)
	class_weights: null # Example: [1.0, 1.0, 1.2, 1.0] if unclear class needs more weight

	data:
	train_size: 0.8
	val_size: 0.1
	test_size: 0.1
	data_path: "./data/comments.csv"
	shuffle: true
	stratify: true # Maintain class distribution in splits

	labels:
	- "excellent"
	- "helpful"
	- "unclear"
	- "outdated"

	# Logging configuration
	logging:
	level: "INFO" # DEBUG, INFO, WARNING, ERROR
	log_file: "./results/training.log"