kclauw commited on
Commit
0fbfc34
·
1 Parent(s): 4ab2875

Upload folder using huggingface_hub

Browse files
parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0/seed_0/checkpoints/checkpoints.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001efb9d382e2850237c54326bb0514a2a2960a668f4a972840262b899604342
3
+ size 130
parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0/seed_0/config.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ runs_folder: /media/god/Second/Runs
3
+ train_type: batch
4
+ huggingface:
5
+ repo_id: kclauw/grokking-oinformation
6
+ synergymask: false
7
+ sparsity_sampling: 10
8
+ scheduler: null
9
+ device: cpu
10
+ test_seed: 123
11
+ evaluate_oinformation: true
12
+ evaluate_sparsity: false
13
+ evaluate_norms: false
14
+ evaluate_subnetworks: false
15
+ evaluate_lottery_ticket: false
16
+ create_grid_search_file: false
17
+ create_plots: true
18
+ store_local: true
19
+ experiment_type: train_single
20
+ unique_plot_path: fcn_tanh/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0
21
+ store_on_scratch: true
22
+ oinformation:
23
+ sampling_rate: 1
24
+ max_batch_greedy: 40
25
+ layer: fc2_post
26
+ njobs: 16
27
+ layers:
28
+ - fc3_post
29
+ train: true
30
+ features_type:
31
+ - train
32
+ max_batch_exhaustive: 3
33
+ loss:
34
+ _target_: model.neural_network.MyHingeLoss
35
+ train:
36
+ lr: 0.028999999999999998
37
+ train_batch_size: 32
38
+ num_workers: 6
39
+ eval_batch_size: 32
40
+ max_epochs: 1400
41
+ max_steps: 1000000
42
+ regularization:
43
+ weight_decay:
44
+ name: wd
45
+ value: 0.0
46
+ type: null
47
+ dropout:
48
+ name: do
49
+ value: 0
50
+ type: dropout
51
+ batchnorm:
52
+ name: bn
53
+ value: false
54
+ type: batchnorm
55
+ oinfo_syn:
56
+ name: syn
57
+ value: 0
58
+ type: null
59
+ layernorm:
60
+ name: ln
61
+ value: false
62
+ type: layernorm
63
+ oinfo_red:
64
+ name: red
65
+ value: 0
66
+ wandb:
67
+ enabled: false
68
+ project: grokking_replica
69
+ unique_filename: parity_n_40_k_3_N_766_100#fcn_tanh_20_20_20_0.00_default#sgd_bs_32_lr_0.03#wd_0.00_do_0_no_bn_syn_0_no_ln_red_0
70
+ regularization_name: null
71
+ dataset:
72
+ train_samples: 766
73
+ test_samples: 100
74
+ parameters:
75
+ _target_: dataloaders.sparse_parity.parity
76
+ 'n': 40
77
+ k: 3
78
+ name: parity_n_${dataset.parameters.n}_k_${dataset.parameters.k}_N_${dataset.train_samples}_${dataset.test_samples}
79
+ model:
80
+ parameters:
81
+ _target_: src.model.neural_network.DynamicFFN
82
+ activation: tanh
83
+ initialization: default
84
+ layers:
85
+ fc1: 20
86
+ fc2: 20
87
+ fc3: 20
88
+ initialization_scale: 0.0
89
+ constrained_norm: false
90
+ name: fcn_${model.parameters.activation}
91
+ layer_names:
92
+ - fc1
93
+ - fc2
94
+ - fc3
95
+ optimizer:
96
+ name: sgd
97
+ parameters:
98
+ _target_: torch.optim.SGD
99
+ weight_decay: ${regularization.weight_decay.value}
100
+ paths:
101
+ root_dir: .
102
+ data_dir: ${paths.root_dir}/data/
103
+ log_dir: ${paths.root_dir}/runs/
104
+ output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0
105
+ plot_dir: ./plots/1_baseline_datasize_vs_lr_small_100_neurons_longer_activation
106
+ plot_dir_all: ./plots/1_baseline_datasize_vs_lr_small_100_neurons_longer_activation/all/fcn_tanh/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0
107
+ work_dir: ${hydra:runtime.cwd}
108
+ experiment_name: 1_baseline_datasize_vs_lr_small_100_neurons_longer_activation
109
+ seeds:
110
+ - 0
111
+ - 1
112
+ - 2
113
+ - 3
114
+ - 97
115
+ evaluate_generalizing_models_only: false
116
+ remove_files: true
117
+ plots:
118
+ experiment1:
119
+ title:
120
+ - train.max_epochs
121
+ type: heatmap
122
+ x: dataset.train_samples
123
+ x_label: dataset size
124
+ y_label: learning rate
125
+ 'y':
126
+ - test_acc
127
+ - synergy
128
+ - redundancy
129
+ grid_search:
130
+ model.parameters.activation:
131
+ array:
132
+ - hardswish
133
+ - relu
134
+ - elu
135
+ - prelu
136
+ - selu
137
+ - gelu
138
+ - celu
139
+ - hardshrink
140
+ - leaky
141
+ - rrelu
142
+ - tanhshrink
143
+ - hardsigmoid
144
+ - hardtanh
145
+ - tanh
146
+ - sigmoid
147
+ - softmax
148
+ train.lr:
149
+ min: 0.01
150
+ max: 0.2
151
+ num: 11
152
+ type: uniform
153
+ dataset.train_samples:
154
+ array:
155
+ - 766
156
+ - 877
157
+ - 988
158
+ - 1100
159
+ experiment_oinfo_title: null
parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0/seed_0/epoch_results_1400_seed_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecbb3387759453910a52089ebd3240d8e976e642f6fbd92b3d7be60e633db95d
3
+ size 124114
parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0/seed_0/oinformation_results_train_1400.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7227625abc8492146cf8f7b0044c8b9c8fad26c20cc57db765f4e29096d707ab
3
+ size 6659004
parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0/seed_0/optimal_oinformation_results_train_1400.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c453f4c97f63a5dee119d89b80a2c9ae459f878313a2229492aa1b06831d3067
3
+ size 707885
parity_n_40_k_3_N_766_100/fcn_tanh_20_20_20_0.00_default/sgd_bs_32_lr_0.03/wd_0.00_do_0_no_bn_syn_0_no_ln_red_0/seed_0/optimal_results_seed_1400_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e18f4dd1addb38b119023fb0a7edcd87538be8031a578b2e898d73dd9caa3da
3
+ size 39