kclauw commited on
Commit
cfd2af4
·
1 Parent(s): 1d81f1e

Upload folder using huggingface_hub

Browse files
parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/checkpoints/checkpoints.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82c9a56a0e4634589608ad47787a72cd37ccc446e22221e28880f2be3177328
3
+ size 130
parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/config.yaml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 0
2
+ runs_folder: /media/god/Second/Runs
3
+ huggingface:
4
+ repo_id: kclauw/grokking-oinformation
5
+ synergymask: false
6
+ sparsity_sampling: 10
7
+ scheduler: null
8
+ device: cpu
9
+ test_seed: 123
10
+ evaluate_oinformation: true
11
+ evaluate_sparsity: false
12
+ evaluate_norms: false
13
+ evaluate_subnetworks: false
14
+ evaluate_lottery_ticket: false
15
+ create_grid_search_file: false
16
+ create_plots: true
17
+ store_local: true
18
+ experiment_type: train_single
19
+ unique_plot_path: fcn_relu/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
20
+ store_on_scratch: true
21
+ oinformation:
22
+ sampling_rate: 1
23
+ max_batch_greedy: 40
24
+ layer: fc2_post
25
+ njobs: 16
26
+ layers:
27
+ - fc3_post
28
+ train: true
29
+ features_type:
30
+ - train
31
+ max_batch_exhaustive: 3
32
+ loss:
33
+ _target_: model.neural_network.MyHingeLoss
34
+ train:
35
+ lr: 0.2
36
+ train_batch_size: 32
37
+ num_workers: 6
38
+ eval_batch_size: 32
39
+ max_epochs: 600
40
+ regularization:
41
+ weight_decay:
42
+ name: wd
43
+ value: 0.01
44
+ type: null
45
+ dropout:
46
+ name: do
47
+ value: 0
48
+ type: dropout
49
+ batchnorm:
50
+ name: bn
51
+ value: false
52
+ type: batchnorm
53
+ oinfo_syn:
54
+ name: syn
55
+ value: 0
56
+ type: null
57
+ layernorm:
58
+ name: ln
59
+ value: false
60
+ type: layernorm
61
+ oinfo_red:
62
+ name: red
63
+ value: 0
64
+ wandb:
65
+ enabled: false
66
+ project: grokking_replica
67
+ unique_filename: parity_n_40_k_3_N_1600_100#fcn_relu_20_20_20_0.00_default#sgd_bs_32_lr_0.20#wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
68
+ regularization_name: null
69
+ dataset:
70
+ train_samples: 1600
71
+ test_samples: 100
72
+ parameters:
73
+ _target_: dataloaders.sparse_parity.parity
74
+ 'n': 40
75
+ k: 3
76
+ name: parity_n_${dataset.parameters.n}_k_${dataset.parameters.k}_N_${dataset.train_samples}_${dataset.test_samples}
77
+ model:
78
+ parameters:
79
+ _target_: src.model.neural_network.DynamicFFN
80
+ activation: relu
81
+ initialization: default
82
+ layers:
83
+ fc1: 20
84
+ fc2: 20
85
+ fc3: 20
86
+ initialization_scale: 0.0
87
+ constrained_norm: false
88
+ name: fcn_${model.parameters.activation}
89
+ layer_names:
90
+ - fc1
91
+ - fc2
92
+ - fc3
93
+ optimizer:
94
+ name: sgd
95
+ parameters:
96
+ _target_: torch.optim.SGD
97
+ weight_decay: ${regularization.weight_decay.value}
98
+ paths:
99
+ root_dir: .
100
+ data_dir: ${paths.root_dir}/data/
101
+ log_dir: ${paths.root_dir}/runs/
102
+ output_dir: /kyukon/scratch/gent/433/vsc43397/oinformation-grokking/runs/parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
103
+ plot_dir: ./plots/1_baseline_datasize_vs_lr_small_100_neurons
104
+ plot_dir_all: ./plots/1_baseline_datasize_vs_lr_small_100_neurons/all/fcn_relu/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0
105
+ work_dir: ${hydra:runtime.cwd}
106
+ experiment_name: 1_baseline_datasize_vs_lr_small_100_neurons
107
+ seeds:
108
+ - 97
109
+ evaluate_generalizing_models_only: false
110
+ remove_files: true
111
+ plots:
112
+ experiment1:
113
+ title:
114
+ - train.max_epochs
115
+ type: heatmap
116
+ x: dataset.train_samples
117
+ x_label: dataset size
118
+ y_label: learning rate
119
+ 'y':
120
+ - test_acc
121
+ - synergy
122
+ - redundancy
123
+ grid_search:
124
+ dataset.train_samples:
125
+ scale: null
126
+ min: 800
127
+ max: 2000
128
+ num: 4
129
+ type: integer
130
+ train.lr:
131
+ min: 0.01
132
+ max: 0.2
133
+ num: 6
134
+ type: uniform
135
+ experiment_oinfo_title: null
parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/epoch_results_600_seed_97.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04899b9643aa498d22194a67e97bf855a34258f60da00c6523511bfc3ed8741e
3
+ size 53705
parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/oinformation_results_train_600.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c3f48435004c6d6320dabd4edb7e892da37160e8a3282ffcf995f75c3acfa5
3
+ size 2853528
parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/optimal_oinformation_results_train_600.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f696a6990be07fa609db2909b9e9d82a42096598f500dbd90ad5c54987a1f89
3
+ size 371997
parity_n_40_k_3_N_1600_100/fcn_relu_20_20_20_0.00_default/sgd_bs_32_lr_0.20/wd_0.01_do_0_no_bn_syn_0_no_ln_red_0/seed_97/optimal_results_seed_600_97.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d1a4931adb924dfe0bfb605dff4cfd3fd77a3ed6fe5ca0692991552f30aa3b
3
+ size 97