def print_trainable_params_percentage(model): orig_param_size = sum(p.numel() for p in model.parameters()) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) trainable_size = count_parameters(model) percentage = trainable_size / orig_param_size * 100 print(f"Trainable param percentage: {percentage:.2f}% ({trainable_size}/{orig_param_size})") return percentage def setup_for_distributed(is_master): """ This function disables printing when not in master process """ import builtins as __builtin__ builtin_print = __builtin__.print def print(*args, **kwargs): force = kwargs.pop('force', False) if is_master or force: builtin_print(*args, **kwargs) __builtin__.print = print