kclauw commited on
Commit
be656d3
·
1 Parent(s): 8aa731a

Upload folder using huggingface_hub

Browse files
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_0/checkpoints/checkpoints_1400.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e174d3901f5508911e129b07501fb42e472ab95801d3bec9b3ccda0f5e0d2c26
3
+ size 23670534
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_0/config.yaml CHANGED
@@ -6,7 +6,7 @@ huggingface:
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
- device: cpu
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
@@ -25,11 +25,11 @@ oinformation:
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
- - fc1_post
29
  train: true
30
  features_type:
31
  - train
32
- max_batch_exhaustive: 3
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
@@ -37,7 +37,7 @@ train:
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
- max_epochs: 900
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
@@ -98,10 +98,11 @@ paths:
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
- plot_dir: ./plots/gridsearch
102
- plot_dir_all: ./plots/gridsearch/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
 
103
  work_dir: ${hydra:runtime.cwd}
104
- experiment_name: gridsearch
105
  seeds:
106
  - 0
107
  - 1
@@ -110,45 +111,36 @@ seeds:
110
  - 97
111
  evaluate_generalizing_models_only: false
112
  remove_files: true
113
- train_seeds_parallel: true
114
- plots:
115
- experiment1:
116
- title:
117
- - train.max_epochs
118
- type: heatmap
119
- x: dataset.train_samples
120
- x_label: dataset size
121
- y_label: learning rate
122
- 'y':
123
- - test_acc
124
- - synergy
125
- - redundancy
126
  grid_search:
127
  dataset.train_samples:
128
  array:
 
 
129
  - 800
 
130
  - 1000
131
- - 1200
132
- - 1500
133
- - 2000
134
- - 3000
135
  train.lr:
136
  array:
 
137
  - 0.1
 
138
  train.train_batch_size:
139
  array:
140
  - 32
 
 
 
141
  dataset.parameters.n:
142
  array:
143
- - 30
144
  - 40
145
- model.parameters.layers.fc1:
146
- array:
147
- - 10
148
- - 12
149
- - 14
150
- - 16
151
- - 18
152
- - 20
153
- - 100
154
  experiment_oinfo_title: null
 
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
+ device: cuda
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
 
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
+ - fc3_post
29
  train: true
30
  features_type:
31
  - train
32
+ max_batch_exhaustive: 10
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
 
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
+ max_epochs: 1400
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
 
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
+ plot_dir: ./plots/gridsearch_100
102
+ plot_dir_all: ./plots/gridsearch_100/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
103
+ run_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs
104
  work_dir: ${hydra:runtime.cwd}
105
+ experiment_name: gridsearch_100
106
  seeds:
107
  - 0
108
  - 1
 
111
  - 97
112
  evaluate_generalizing_models_only: false
113
  remove_files: true
114
+ train_seeds_parallel: false
 
 
 
 
 
 
 
 
 
 
 
 
115
  grid_search:
116
  dataset.train_samples:
117
  array:
118
+ - 500
119
+ - 600
120
  - 800
121
+ - 900
122
  - 1000
123
+ - 1100
124
+ name: train_samples
 
 
125
  train.lr:
126
  array:
127
+ - 0.05
128
  - 0.1
129
+ name: lr
130
  train.train_batch_size:
131
  array:
132
  - 32
133
+ - 64
134
+ - 128
135
+ name: train_bs
136
  dataset.parameters.n:
137
  array:
 
138
  - 40
139
+ - 50
140
+ name: 'n'
141
+ plots:
142
+ training_heatmaps:
143
+ plot_1:
144
+ - dataset.train_samples
145
+ - train.train_batch_size
 
 
146
  experiment_oinfo_title: null
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_0/epoch_results_1400_seed_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1faf94efe8e1ac182cdd6e61fed6088c9e5b501815e27299bc6be37e037c2dee
3
+ size 101686
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_0/optimal_results_seed_1400_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8498fd021a0ed5b5716b682b2575834da0cd3bb2366ff29bf3baceb24fb171c1
3
+ size 98
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_1/config.yaml CHANGED
@@ -6,7 +6,7 @@ huggingface:
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
- device: cpu
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
@@ -25,11 +25,11 @@ oinformation:
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
- - fc1_post
29
  train: true
30
  features_type:
31
  - train
32
- max_batch_exhaustive: 3
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
@@ -37,7 +37,7 @@ train:
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
- max_epochs: 900
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
@@ -98,10 +98,11 @@ paths:
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
- plot_dir: ./plots/gridsearch
102
- plot_dir_all: ./plots/gridsearch/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
 
103
  work_dir: ${hydra:runtime.cwd}
104
- experiment_name: gridsearch
105
  seeds:
106
  - 0
107
  - 1
@@ -110,45 +111,36 @@ seeds:
110
  - 97
111
  evaluate_generalizing_models_only: false
112
  remove_files: true
113
- train_seeds_parallel: true
114
- plots:
115
- experiment1:
116
- title:
117
- - train.max_epochs
118
- type: heatmap
119
- x: dataset.train_samples
120
- x_label: dataset size
121
- y_label: learning rate
122
- 'y':
123
- - test_acc
124
- - synergy
125
- - redundancy
126
  grid_search:
127
  dataset.train_samples:
128
  array:
 
 
129
  - 800
 
130
  - 1000
131
- - 1200
132
- - 1500
133
- - 2000
134
- - 3000
135
  train.lr:
136
  array:
 
137
  - 0.1
 
138
  train.train_batch_size:
139
  array:
140
  - 32
 
 
 
141
  dataset.parameters.n:
142
  array:
143
- - 30
144
  - 40
145
- model.parameters.layers.fc1:
146
- array:
147
- - 10
148
- - 12
149
- - 14
150
- - 16
151
- - 18
152
- - 20
153
- - 100
154
  experiment_oinfo_title: null
 
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
+ device: cuda
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
 
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
+ - fc3_post
29
  train: true
30
  features_type:
31
  - train
32
+ max_batch_exhaustive: 10
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
 
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
+ max_epochs: 1400
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
 
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
+ plot_dir: ./plots/gridsearch_100
102
+ plot_dir_all: ./plots/gridsearch_100/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
103
+ run_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs
104
  work_dir: ${hydra:runtime.cwd}
105
+ experiment_name: gridsearch_100
106
  seeds:
107
  - 0
108
  - 1
 
111
  - 97
112
  evaluate_generalizing_models_only: false
113
  remove_files: true
114
+ train_seeds_parallel: false
 
 
 
 
 
 
 
 
 
 
 
 
115
  grid_search:
116
  dataset.train_samples:
117
  array:
118
+ - 500
119
+ - 600
120
  - 800
121
+ - 900
122
  - 1000
123
+ - 1100
124
+ name: train_samples
 
 
125
  train.lr:
126
  array:
127
+ - 0.05
128
  - 0.1
129
+ name: lr
130
  train.train_batch_size:
131
  array:
132
  - 32
133
+ - 64
134
+ - 128
135
+ name: train_bs
136
  dataset.parameters.n:
137
  array:
 
138
  - 40
139
+ - 50
140
+ name: 'n'
141
+ plots:
142
+ training_heatmaps:
143
+ plot_1:
144
+ - dataset.train_samples
145
+ - train.train_batch_size
 
 
146
  experiment_oinfo_title: null
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_2/config.yaml CHANGED
@@ -6,7 +6,7 @@ huggingface:
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
- device: cpu
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
@@ -25,11 +25,11 @@ oinformation:
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
- - fc1_post
29
  train: true
30
  features_type:
31
  - train
32
- max_batch_exhaustive: 3
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
@@ -37,7 +37,7 @@ train:
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
- max_epochs: 900
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
@@ -98,10 +98,11 @@ paths:
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
- plot_dir: ./plots/gridsearch
102
- plot_dir_all: ./plots/gridsearch/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
 
103
  work_dir: ${hydra:runtime.cwd}
104
- experiment_name: gridsearch
105
  seeds:
106
  - 0
107
  - 1
@@ -110,45 +111,36 @@ seeds:
110
  - 97
111
  evaluate_generalizing_models_only: false
112
  remove_files: true
113
- train_seeds_parallel: true
114
- plots:
115
- experiment1:
116
- title:
117
- - train.max_epochs
118
- type: heatmap
119
- x: dataset.train_samples
120
- x_label: dataset size
121
- y_label: learning rate
122
- 'y':
123
- - test_acc
124
- - synergy
125
- - redundancy
126
  grid_search:
127
  dataset.train_samples:
128
  array:
 
 
129
  - 800
 
130
  - 1000
131
- - 1200
132
- - 1500
133
- - 2000
134
- - 3000
135
  train.lr:
136
  array:
 
137
  - 0.1
 
138
  train.train_batch_size:
139
  array:
140
  - 32
 
 
 
141
  dataset.parameters.n:
142
  array:
143
- - 30
144
  - 40
145
- model.parameters.layers.fc1:
146
- array:
147
- - 10
148
- - 12
149
- - 14
150
- - 16
151
- - 18
152
- - 20
153
- - 100
154
  experiment_oinfo_title: null
 
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
+ device: cuda
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
 
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
+ - fc3_post
29
  train: true
30
  features_type:
31
  - train
32
+ max_batch_exhaustive: 10
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
 
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
+ max_epochs: 1400
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
 
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
+ plot_dir: ./plots/gridsearch_100
102
+ plot_dir_all: ./plots/gridsearch_100/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
103
+ run_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs
104
  work_dir: ${hydra:runtime.cwd}
105
+ experiment_name: gridsearch_100
106
  seeds:
107
  - 0
108
  - 1
 
111
  - 97
112
  evaluate_generalizing_models_only: false
113
  remove_files: true
114
+ train_seeds_parallel: false
 
 
 
 
 
 
 
 
 
 
 
 
115
  grid_search:
116
  dataset.train_samples:
117
  array:
118
+ - 500
119
+ - 600
120
  - 800
121
+ - 900
122
  - 1000
123
+ - 1100
124
+ name: train_samples
 
 
125
  train.lr:
126
  array:
127
+ - 0.05
128
  - 0.1
129
+ name: lr
130
  train.train_batch_size:
131
  array:
132
  - 32
133
+ - 64
134
+ - 128
135
+ name: train_bs
136
  dataset.parameters.n:
137
  array:
 
138
  - 40
139
+ - 50
140
+ name: 'n'
141
+ plots:
142
+ training_heatmaps:
143
+ plot_1:
144
+ - dataset.train_samples
145
+ - train.train_batch_size
 
 
146
  experiment_oinfo_title: null
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_3/config.yaml CHANGED
@@ -6,7 +6,7 @@ huggingface:
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
- device: cpu
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
@@ -25,11 +25,11 @@ oinformation:
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
- - fc1_post
29
  train: true
30
  features_type:
31
  - train
32
- max_batch_exhaustive: 3
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
@@ -37,7 +37,7 @@ train:
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
- max_epochs: 900
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
@@ -98,10 +98,11 @@ paths:
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
- plot_dir: ./plots/gridsearch
102
- plot_dir_all: ./plots/gridsearch/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
 
103
  work_dir: ${hydra:runtime.cwd}
104
- experiment_name: gridsearch
105
  seeds:
106
  - 0
107
  - 1
@@ -110,45 +111,36 @@ seeds:
110
  - 97
111
  evaluate_generalizing_models_only: false
112
  remove_files: true
113
- train_seeds_parallel: true
114
- plots:
115
- experiment1:
116
- title:
117
- - train.max_epochs
118
- type: heatmap
119
- x: dataset.train_samples
120
- x_label: dataset size
121
- y_label: learning rate
122
- 'y':
123
- - test_acc
124
- - synergy
125
- - redundancy
126
  grid_search:
127
  dataset.train_samples:
128
  array:
 
 
129
  - 800
 
130
  - 1000
131
- - 1200
132
- - 1500
133
- - 2000
134
- - 3000
135
  train.lr:
136
  array:
 
137
  - 0.1
 
138
  train.train_batch_size:
139
  array:
140
  - 32
 
 
 
141
  dataset.parameters.n:
142
  array:
143
- - 30
144
  - 40
145
- model.parameters.layers.fc1:
146
- array:
147
- - 10
148
- - 12
149
- - 14
150
- - 16
151
- - 18
152
- - 20
153
- - 100
154
  experiment_oinfo_title: null
 
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
+ device: cuda
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
 
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
+ - fc3_post
29
  train: true
30
  features_type:
31
  - train
32
+ max_batch_exhaustive: 10
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
 
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
+ max_epochs: 1400
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
 
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
+ plot_dir: ./plots/gridsearch_100
102
+ plot_dir_all: ./plots/gridsearch_100/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
103
+ run_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs
104
  work_dir: ${hydra:runtime.cwd}
105
+ experiment_name: gridsearch_100
106
  seeds:
107
  - 0
108
  - 1
 
111
  - 97
112
  evaluate_generalizing_models_only: false
113
  remove_files: true
114
+ train_seeds_parallel: false
 
 
 
 
 
 
 
 
 
 
 
 
115
  grid_search:
116
  dataset.train_samples:
117
  array:
118
+ - 500
119
+ - 600
120
  - 800
121
+ - 900
122
  - 1000
123
+ - 1100
124
+ name: train_samples
 
 
125
  train.lr:
126
  array:
127
+ - 0.05
128
  - 0.1
129
+ name: lr
130
  train.train_batch_size:
131
  array:
132
  - 32
133
+ - 64
134
+ - 128
135
+ name: train_bs
136
  dataset.parameters.n:
137
  array:
 
138
  - 40
139
+ - 50
140
+ name: 'n'
141
+ plots:
142
+ training_heatmaps:
143
+ plot_1:
144
+ - dataset.train_samples
145
+ - train.train_batch_size
 
 
146
  experiment_oinfo_title: null
parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/config.yaml CHANGED
@@ -6,7 +6,7 @@ huggingface:
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
- device: cpu
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
@@ -25,11 +25,11 @@ oinformation:
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
- - fc1_post
29
  train: true
30
  features_type:
31
  - train
32
- max_batch_exhaustive: 3
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
@@ -37,7 +37,7 @@ train:
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
- max_epochs: 900
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
@@ -98,10 +98,11 @@ paths:
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
- plot_dir: ./plots/gridsearch
102
- plot_dir_all: ./plots/gridsearch/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
 
103
  work_dir: ${hydra:runtime.cwd}
104
- experiment_name: gridsearch
105
  seeds:
106
  - 0
107
  - 1
@@ -110,45 +111,36 @@ seeds:
110
  - 97
111
  evaluate_generalizing_models_only: false
112
  remove_files: true
113
- train_seeds_parallel: true
114
- plots:
115
- experiment1:
116
- title:
117
- - train.max_epochs
118
- type: heatmap
119
- x: dataset.train_samples
120
- x_label: dataset size
121
- y_label: learning rate
122
- 'y':
123
- - test_acc
124
- - synergy
125
- - redundancy
126
  grid_search:
127
  dataset.train_samples:
128
  array:
 
 
129
  - 800
 
130
  - 1000
131
- - 1200
132
- - 1500
133
- - 2000
134
- - 3000
135
  train.lr:
136
  array:
 
137
  - 0.1
 
138
  train.train_batch_size:
139
  array:
140
  - 32
 
 
 
141
  dataset.parameters.n:
142
  array:
143
- - 30
144
  - 40
145
- model.parameters.layers.fc1:
146
- array:
147
- - 10
148
- - 12
149
- - 14
150
- - 16
151
- - 18
152
- - 20
153
- - 100
154
  experiment_oinfo_title: null
 
6
  synergymask: false
7
  sparsity_sampling: 10
8
  scheduler: null
9
+ device: cuda
10
  test_seed: 123
11
  evaluate_oinformation: false
12
  evaluate_sparsity: false
 
25
  layer: fc2_post
26
  njobs: 16
27
  layers:
28
+ - fc3_post
29
  train: true
30
  features_type:
31
  - train
32
+ max_batch_exhaustive: 10
33
  loss:
34
  _target_: model.neural_network.MyHingeLoss
35
  train:
 
37
  train_batch_size: 32
38
  num_workers: 6
39
  eval_batch_size: 32
40
+ max_epochs: 1400
41
  max_steps: 1000000
42
  regularization:
43
  weight_decay:
 
98
  data_dir: ${paths.root_dir}/data/
99
  log_dir: ${paths.root_dir}/runs/
100
  output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1000_100/fcn_relu_100_0.00_default/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
101
+ plot_dir: ./plots/gridsearch_100
102
+ plot_dir_all: ./plots/gridsearch_100/all/fcn_relu/sgd_bs_32_lr_0.10/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
103
+ run_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs
104
  work_dir: ${hydra:runtime.cwd}
105
+ experiment_name: gridsearch_100
106
  seeds:
107
  - 0
108
  - 1
 
111
  - 97
112
  evaluate_generalizing_models_only: false
113
  remove_files: true
114
+ train_seeds_parallel: false
 
 
 
 
 
 
 
 
 
 
 
 
115
  grid_search:
116
  dataset.train_samples:
117
  array:
118
+ - 500
119
+ - 600
120
  - 800
121
+ - 900
122
  - 1000
123
+ - 1100
124
+ name: train_samples
 
 
125
  train.lr:
126
  array:
127
+ - 0.05
128
  - 0.1
129
+ name: lr
130
  train.train_batch_size:
131
  array:
132
  - 32
133
+ - 64
134
+ - 128
135
+ name: train_bs
136
  dataset.parameters.n:
137
  array:
 
138
  - 40
139
+ - 50
140
+ name: 'n'
141
+ plots:
142
+ training_heatmaps:
143
+ plot_1:
144
+ - dataset.train_samples
145
+ - train.train_batch_size
 
 
146
  experiment_oinfo_title: null