breaking-vision-clip-classifier / scripts /hyperparameter_tuning.py

wip

c850c95 16 days ago

9.89 kB

	import optuna
	import os
	from datetime import datetime
	import pandas as pd
	from pathlib import Path
	import json
	import math

	import sys
	sys.path.append(os.path.dirname(os.path.dirname(__file__)))
	from scripts.train import train_and_evaluate
	from src.utils.utils import create_run_directory

	def create_hyperparam_directory():
	"""Create a parent directory for all hyperparameter searches"""
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	base_dir = "runs_hyperparam"
	hyperparam_dir = os.path.join(base_dir, f"hyperparam_{timestamp}")
	os.makedirs(hyperparam_dir, exist_ok=True)
	return hyperparam_dir

	def objective(trial, hyperparam_run_dir, data_path):
	"""Objective function for a single dataset"""

	# Then suggest parameters using the model-specific ranges
	config = {
	"clip_model": trial.suggest_categorical("clip_model", ["openai/clip-vit-base-patch32", "openai/clip-vit-large-patch14"]),
	"batch_size": trial.suggest_categorical("batch_size", [8,16,32]),
	"unfreeze_layers": trial.suggest_int("unfreeze_layers", 1, 4),
	"learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
	"weight_decay": trial.suggest_float("weight_decay", 1e-8, 1e-1, log=True),
	"gradient_clip_max_norm": trial.suggest_float("gradient_clip_max_norm", 0.1, 1.0),
	"augmentation_strength": trial.suggest_float("augmentation_strength", 0.0, 1.0),
	"crop_scale_min": trial.suggest_float("crop_scale_min", 0.6, 0.9),
	"max_frames": trial.suggest_int("max_frames", 5, 15),
	"sigma": trial.suggest_float("sigma", 0.1, 0.5),
	}

	class_labels = ["windmill", "halo", "swipe", "baby_mill"][:3]

	# Fixed configurations
	config.update({
	"class_labels": class_labels,
	"num_classes": len(class_labels),
	"data_path": data_path,
	"num_epochs": 50,
	"patience": 10,
	"image_size": 224,
	"crop_scale_max": 1.0,
	"normalization_mean": [0.485, 0.456, 0.406],
	"normalization_std": [0.229, 0.224, 0.225],
	"overfitting_threshold": 10,
	})

	# Derive augmentation parameters
	config.update({
	"flip_probability": 0.5 * config["augmentation_strength"],
	"rotation_degrees": int(15 * config["augmentation_strength"]),
	"brightness_jitter": 0.2 * config["augmentation_strength"],
	"contrast_jitter": 0.2 * config["augmentation_strength"],
	"saturation_jitter": 0.2 * config["augmentation_strength"],
	"hue_jitter": 0.1 * config["augmentation_strength"],
	})

	# Create dataset-specific run directory
	dataset_label = '_'.join(Path(data_path).parts[-2:]) # Get last two parts of path
	trial_dir = create_run_directory(
	prefix=f"trial_{dataset_label}",
	parent_dir=hyperparam_run_dir
	)
	config["run_dir"] = trial_dir


	# Run training and evaluation with device cleanup
	try:
	val_accuracy, vis_dir = train_and_evaluate(config)

	if val_accuracy is None or math.isnan(val_accuracy) or math.isinf(val_accuracy):
	raise ValueError(f"Invalid accuracy value: {val_accuracy}")

	# Save trial info
	trial_info = {
	'dataset': data_path,
	'dataset_label': dataset_label,
	'trial_number': trial.number,
	'parameters': trial.params,
	'accuracy': val_accuracy,
	'visualization_dir': vis_dir,
	'trial_dir': trial_dir
	}

	with open(os.path.join(trial_dir, 'trial_info.json'), 'w') as f:
	json.dump(trial_info, f, indent=4)

	return val_accuracy

	except Exception as e:
	print(f"Error in trial for {data_path}: {str(e)}")
	# Log detailed error information
	error_log_path = os.path.join(hyperparam_run_dir, 'error_log.txt')
	with open(error_log_path, 'a') as f:
	f.write(f"\nError in trial at {datetime.now()}:\n")
	f.write(f"Dataset: {data_path}\n")
	f.write(f"Error: {str(e)}\n")
	f.write(f"Trial params: {trial.params}\n")
	f.write("Stack trace:\n")
	import traceback
	f.write(traceback.format_exc())
	f.write("\n" + "="*50 + "\n")

	return float('-inf')

	def run_hyperparameter_search(data_paths, n_trials=100):
	"""Run hyperparameter search for multiple datasets"""

	# Create parent directory for all searches
	parent_hyperparam_dir = create_hyperparam_directory()

	# Store results for all datasets
	all_results = {}

	for data_path in data_paths:
	print(f"\nStarting hyperparameter search for dataset: {data_path}")

	# Create dataset-specific directory
	dataset_label = '_'.join(Path(data_path).parts[-2:])
	dataset_dir = os.path.join(parent_hyperparam_dir, f"search_{dataset_label}")
	os.makedirs(dataset_dir, exist_ok=True)

	# Create and run study with explicit trial count tracking
	study = optuna.create_study(direction="maximize")
	completed_trials = 0
	failed_trials = []
	total_attempts = 0
	max_attempts = n_trials * 2
	while completed_trials < n_trials and total_attempts < max_attempts:
	try:
	total_attempts += 1
	study.optimize(
	lambda trial: objective(trial, dataset_dir, data_path),
	n_trials=1
	)
	# Only increment if the trial actually succeeded
	if study.trials[-1].value != float('-inf'):
	completed_trials += 1
	print(f"Completed trial {completed_trials}/{n_trials} for {dataset_label}")
	else:
	error_info = {
	'trial_number': completed_trials + len(failed_trials) + 1,
	'error': "Trial returned -inf",
	'timestamp': datetime.now().isoformat()
	}
	failed_trials.append(error_info)
	print(f"Failed trial for {dataset_label}: returned -inf")

	except Exception as e:
	error_info = {
	'trial_number': completed_trials + len(failed_trials) + 1,
	'error': str(e),
	'timestamp': datetime.now().isoformat()
	}
	failed_trials.append(error_info)
	print(f"Error in trial for {dataset_label}: {str(e)}")

	# Log the error
	with open(os.path.join(dataset_dir, 'failed_trials.json'), 'w') as f:
	json.dump(failed_trials, f, indent=4)
	if total_attempts >= max_attempts:
	print(f"Warning: Reached maximum attempts ({max_attempts}) for {dataset_label}")

	# Save study results
	results_df = study.trials_dataframe()
	results_df.to_csv(os.path.join(dataset_dir, 'study_results.csv'))

	# Save trial statistics
	trial_stats = {
	'completed_trials': completed_trials,
	'failed_trials': len(failed_trials),
	'total_attempts': completed_trials + len(failed_trials)
	}
	with open(os.path.join(dataset_dir, 'trial_statistics.json'), 'w') as f:
	json.dump(trial_stats, f, indent=4)

	# Save best trial info
	best_trial = study.best_trial
	best_params_path = os.path.join(dataset_dir, 'best_params.txt')
	with open(best_params_path, 'w') as f:
	f.write(f"Best trial value: {best_trial.value}\n\n")
	f.write("Best parameters:\n")
	for key, value in best_trial.params.items():
	f.write(f"{key}: {value}\n")

	# Store results
	all_results[data_path] = {
	'best_value': best_trial.value,
	'best_params': best_trial.params,
	'study': study,
	'results_df': results_df,
	'failed_trials': failed_trials,
	'trial_stats': trial_stats
	}

	print(f"\nResults for {data_path}:")
	print(f"Completed trials: {completed_trials}")
	print(f"Failed trials: {len(failed_trials)}")
	print(f"Best trial value: {best_trial.value}")
	print("Best parameters:")
	for key, value in best_trial.params.items():
	print(f" {key}: {value}")

	# Create overall summary with additional statistics
	summary_data = []
	for data_path, result in all_results.items():
	summary_data.append({
	'dataset': data_path,
	'best_accuracy': result['best_value'],
	'completed_trials': result['trial_stats']['completed_trials'],
	'failed_trials': result['trial_stats']['failed_trials'],
	**result['best_params']
	})

	summary_df = pd.DataFrame(summary_data)
	summary_df.to_csv(os.path.join(parent_hyperparam_dir, 'overall_summary.csv'), index=False)

	return parent_hyperparam_dir, all_results

	if __name__ == "__main__":
	# List of dataset paths to optimize
	data_paths = [
	'./data/blog/datasets/bryant/random',
	'./data/blog/datasets/bryant/adjusted',
	'./data/blog/datasets/youtube/random',
	'./data/blog/datasets/youtube/adjusted',
	'./data/blog/datasets/combined/random',
	'./data/blog/datasets/combined/adjusted',
	'./data/blog/datasets/bryant_train_youtube_val/default'
	]

	# Run hyperparameter search
	hyperparam_dir, results = run_hyperparameter_search(
	data_paths,
	n_trials=8 # Adjust as needed
	)

	print(f"\nHyperparameter search complete!")
	print(f"Results are saved in: {hyperparam_dir}")