Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +71 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt10000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt15000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt20000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt25000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt30000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt35000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt40000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt45000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt5000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt50000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt55000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt60000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt65000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt70000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt75000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt80000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt85000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt90000.pt +3 -0
- new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt95000.pt +3 -0
- new-grid/k16_N1024/logs/plot.log +94 -0
- new-grid/k16_N1024/logs/train.log +0 -0
- new-grid/k16_N1024/plots/ablation_accuracy.png +0 -0
- new-grid/k16_N1024/plots/ablation_conditional_accuracy.png +3 -0
- new-grid/k16_N1024/plots/ablation_per_position.png +3 -0
- new-grid/k16_N1024/plots/baseline_accuracy.png +0 -0
- new-grid/k16_N1024/plots/baseline_conditional_accuracy.png +0 -0
- new-grid/k16_N1024/plots/compare_cinclogits_layer0.png +0 -0
- new-grid/k16_N1024/plots/compare_cinclogits_layer1.png +0 -0
- new-grid/k16_N1024/plots/compare_intensity_layer0.png +0 -0
- new-grid/k16_N1024/plots/compare_intensity_layer0_ub10.png +0 -0
- new-grid/k16_N1024/plots/compare_intensity_layer0_ub15.png +0 -0
- new-grid/k16_N1024/plots/compare_intensity_layer1.png +0 -0
- new-grid/k16_N1024/plots/compare_intensity_layer1_ub10.png +0 -0
- new-grid/k16_N1024/plots/compare_intensity_layer1_ub15.png +0 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt10000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt100000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt15000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt20000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt25000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt30000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt35000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt40000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt45000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt5000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt50000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt55000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt60000.pt +3 -0
- new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt65000.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,74 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
new-grid/k16_N1024/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
new-grid/k16_N1024/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
new-grid/k16_N128/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
new-grid/k16_N128/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
new-grid/k16_N256/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
new-grid/k16_N256/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
new-grid/k16_N256/plots/length_generalization.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
new-grid/k16_N512/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
new-grid/k16_N512/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
new-grid/k32_N1024/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
new-grid/k32_N1024/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
new-grid/k32_N128/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
new-grid/k32_N128/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
new-grid/k32_N128/plots/length_generalization.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
new-grid/k32_N256/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
new-grid/k32_N256/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
new-grid/k32_N256/plots/consecutive_attention.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
new-grid/k32_N256/plots/qk_cross_local.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
new-grid/k32_N256/plots/qk_cross_mean.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
new-grid/k32_N256/plots/qk_cross_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
new-grid/k32_N256/plots/qk_heatmap.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
new-grid/k32_N256/plots/qk_local_instances.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
new-grid/k32_N256/plots/qk_local_smooth.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
new-grid/k32_N256/plots/qk_monotonicity_summary.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
new-grid/k32_N256/plots/qk_neighbor_cross.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
new-grid/k32_N256/plots/qk_raw_structure.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
new-grid/k32_N512/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
new-grid/k32_N512/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
new-grid/k32_N512/plots/attn1_importance_analysis.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
new-grid/k32_N512/plots/attn_spread.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
new-grid/k32_N512/plots/attn_value_distance.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
new-grid/k32_N512/plots/consecutive_attention.png filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
new-grid/k32_N512/plots/consecutive_attention_grid.png filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
new-grid/k32_N512/plots/consecutive_attention_grid_ckpt60k.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
new-grid/k32_N512/plots/intervened_consecutive.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
new-grid/k32_N512/plots/intervened_consecutive_100k.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
new-grid/k32_N512/plots/intervened_consecutive_int5.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
new-grid/k32_N512/plots/l1_qk_interaction.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
new-grid/k32_N512/plots/l1_vs_l2_qk_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
new-grid/k32_N512/plots/length_generalization.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
new-grid/k32_N512/plots/qk_cross_local.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
new-grid/k32_N512/plots/qk_cross_mean.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
new-grid/k32_N512/plots/qk_cross_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
new-grid/k32_N512/plots/qk_cross_with_pos.png filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
new-grid/k32_N512/plots/qk_deep_decomp.png filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
new-grid/k32_N512/plots/qk_fixed_query.png filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
new-grid/k32_N512/plots/qk_full_value_path.png filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
new-grid/k32_N512/plots/qk_heatmap.png filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
new-grid/k32_N512/plots/qk_heatmap_ty.png filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
new-grid/k32_N512/plots/qk_heatmap_xt.png filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_60k.png filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_maxscore.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_slices.png filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_slices2.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_slices3.png filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_split.png filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
new-grid/k32_N512/plots/qk_heatmap_xt_split_slices.png filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
new-grid/k32_N512/plots/qk_heatmap_zy.png filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
new-grid/k32_N512/plots/qk_interaction_decomp.png filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
new-grid/k32_N512/plots/qk_local_instances.png filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
new-grid/k32_N512/plots/qk_local_smooth.png filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
new-grid/k32_N512/plots/qk_monotonicity_summary.png filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
new-grid/k32_N512/plots/qk_neighbor_cross.png filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
new-grid/k32_N512/plots/qk_neighbor_score.png filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
new-grid/k32_N512/plots/qk_query_side_decomp.png filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
new-grid/k32_N512/plots/qk_raw_structure.png filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
new-grid/k32_N512/plots/qk_residual_path.png filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
new-grid/k32_N512/plots/qk_self_score.png filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
new-grid/k32_N512/plots/qk_slope_profiles.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
new-grid/k32_N512/plots/qk_value_heatmap.png filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
new-grid/k32_N512/plots/qk_value_heatmap_base.png filter=lfs diff=lfs merge=lfs -text
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt10000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8474bcdfa9e03438efc5c9b7a0e05364e02f0329f50b5f82702604068c9b4ea8
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6abd9600b9206c545cb471615315234567db7d4f26037f919854d0ea8108bfa
|
| 3 |
+
size 918949
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt15000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:504160082b9131e129fb3ce5deb9143ea8f17ce79819def70676b232248b48e4
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt20000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:629cfb28178938761ba5cf23938837c135d77df00151b4ff2235191113f37aed
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt25000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41f72b2fd86ad3acdb890ba4e8d14986c9caecaf35d275c0a01524794fe3fdbd
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt30000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f31f90f535fc57b7206b9fda66281de3ec57908bf3c42198d5f1610ef57df14b
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt35000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b4af5a7087999a895b7b10a6dfde75f9ffeaaf11f6ac9edee6aa571cb0cc8e6
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt40000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4968709f49088450d1eba71404bbc95d253b84b75119fcba4bcce6937fc4ed75
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt45000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb1aa86329960e33707032c36e750ea346b2d6575f109c11e33dfb3016fb3d8d
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt5000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb50cb1b6c97affc0bd7371ac10a58b3dedd07ccc9eae4f82e8b45ee70ac6957
|
| 3 |
+
size 918879
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt50000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fcadea35e7e7bd44d6b7c9adc7542fbc69bc8d89227da1e9454e5eba8743df9
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt55000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703ec62dbd7730a73b45248ee7c503c4409b6132f389532d1ff93f8ec5b98ebb
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt60000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe384383c23b5f0e4068fafe95e58a83b1fe0357a162aa95812fee985b3578c9
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt65000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9f50e5acb3c13da7ab1e65cd4e957ea652aa6a64def830ac799aac318c41356
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt70000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2495844b1242b7506ffa52c6c476d25d15b1cd1b69ce2582eee0974b1cfd9f5
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt75000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2930aa155863ac129951258e0b39b2c1f46616fad023e48f8769d3201362ff59
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt80000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5af4fb7088213a08bbd637ff3acbefe524f361b4cd69a443748be4a2439624a8
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt85000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01499164b32b25dfffe8272645d3614cdee749df291d6185e5482775e43009c4
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt90000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73164455b50012ca76976e8c9727c9f03487cae4755b9411f4bafe0b3b22dd2b
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt95000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b16e97b823340e10d6dd687757155bb3ac685ee677ad525216196d25c6f3ccbc
|
| 3 |
+
size 918914
|
new-grid/k16_N1024/logs/plot.log
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Loading checkpoint: /mnt/task_runtime/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt
|
| 2 |
+
block_size=16, vocab_n=1024, device=cuda
|
| 3 |
+
Computing baseline...
|
| 4 |
+
full_seq_acc=1.0000
|
| 5 |
+
Saved baseline plots
|
| 6 |
+
Computing ablation...
|
| 7 |
+
skip_layer=0: full_seq_acc=0.0000
|
| 8 |
+
skip_layer=1: full_seq_acc=0.0000
|
| 9 |
+
Saved ablation plots
|
| 10 |
+
Computing cinclogits...
|
| 11 |
+
Layer 0: done
|
| 12 |
+
Layer 1: done
|
| 13 |
+
Saved cinclogits plots
|
| 14 |
+
Computing intensity layer=0 ub=5...
|
| 15 |
+
WARNING: intensity=-0.50 got 3/200 valid after 2000 rounds
|
| 16 |
+
WARNING: intensity=-0.25 got 3/200 valid after 2000 rounds
|
| 17 |
+
WARNING: intensity=0.00 got 6/200 valid after 2000 rounds
|
| 18 |
+
WARNING: intensity=0.25 got 3/200 valid after 2000 rounds
|
| 19 |
+
WARNING: intensity=0.50 got 2/200 valid after 2000 rounds
|
| 20 |
+
WARNING: intensity=0.75 got 3/200 valid after 2000 rounds
|
| 21 |
+
WARNING: intensity=1.00 got 9/200 valid after 2000 rounds
|
| 22 |
+
WARNING: intensity=1.25 got 5/200 valid after 2000 rounds
|
| 23 |
+
WARNING: intensity=1.50 got 5/200 valid after 2000 rounds
|
| 24 |
+
WARNING: intensity=1.75 got 6/200 valid after 2000 rounds
|
| 25 |
+
WARNING: intensity=2.00 got 8/200 valid after 2000 rounds
|
| 26 |
+
Done
|
| 27 |
+
Computing intensity layer=0 ub=10...
|
| 28 |
+
WARNING: intensity=-0.50 got 19/200 valid after 2000 rounds
|
| 29 |
+
WARNING: intensity=-0.25 got 28/200 valid after 2000 rounds
|
| 30 |
+
WARNING: intensity=0.00 got 16/200 valid after 2000 rounds
|
| 31 |
+
WARNING: intensity=0.25 got 15/200 valid after 2000 rounds
|
| 32 |
+
WARNING: intensity=0.50 got 21/200 valid after 2000 rounds
|
| 33 |
+
WARNING: intensity=0.75 got 18/200 valid after 2000 rounds
|
| 34 |
+
WARNING: intensity=1.00 got 20/200 valid after 2000 rounds
|
| 35 |
+
WARNING: intensity=1.25 got 19/200 valid after 2000 rounds
|
| 36 |
+
WARNING: intensity=1.50 got 13/200 valid after 2000 rounds
|
| 37 |
+
WARNING: intensity=1.75 got 21/200 valid after 2000 rounds
|
| 38 |
+
WARNING: intensity=2.00 got 18/200 valid after 2000 rounds
|
| 39 |
+
Done
|
| 40 |
+
Computing intensity layer=0 ub=15...
|
| 41 |
+
WARNING: intensity=-0.50 got 49/200 valid after 2000 rounds
|
| 42 |
+
WARNING: intensity=-0.25 got 53/200 valid after 2000 rounds
|
| 43 |
+
WARNING: intensity=0.00 got 41/200 valid after 2000 rounds
|
| 44 |
+
WARNING: intensity=0.25 got 48/200 valid after 2000 rounds
|
| 45 |
+
WARNING: intensity=0.50 got 43/200 valid after 2000 rounds
|
| 46 |
+
WARNING: intensity=0.75 got 38/200 valid after 2000 rounds
|
| 47 |
+
WARNING: intensity=1.00 got 42/200 valid after 2000 rounds
|
| 48 |
+
WARNING: intensity=1.25 got 49/200 valid after 2000 rounds
|
| 49 |
+
WARNING: intensity=1.50 got 46/200 valid after 2000 rounds
|
| 50 |
+
WARNING: intensity=1.75 got 40/200 valid after 2000 rounds
|
| 51 |
+
WARNING: intensity=2.00 got 34/200 valid after 2000 rounds
|
| 52 |
+
Done
|
| 53 |
+
Computing intensity layer=1 ub=5...
|
| 54 |
+
WARNING: intensity=-0.50 got 4/200 valid after 2000 rounds
|
| 55 |
+
WARNING: intensity=-0.25 got 1/200 valid after 2000 rounds
|
| 56 |
+
WARNING: intensity=0.00 got 3/200 valid after 2000 rounds
|
| 57 |
+
WARNING: intensity=0.25 got 4/200 valid after 2000 rounds
|
| 58 |
+
WARNING: intensity=0.50 got 4/200 valid after 2000 rounds
|
| 59 |
+
WARNING: intensity=0.75 got 4/200 valid after 2000 rounds
|
| 60 |
+
WARNING: intensity=1.00 got 5/200 valid after 2000 rounds
|
| 61 |
+
WARNING: intensity=1.25 got 5/200 valid after 2000 rounds
|
| 62 |
+
WARNING: intensity=1.50 got 6/200 valid after 2000 rounds
|
| 63 |
+
WARNING: intensity=1.75 got 1/200 valid after 2000 rounds
|
| 64 |
+
WARNING: intensity=2.00 got 7/200 valid after 2000 rounds
|
| 65 |
+
Done
|
| 66 |
+
Computing intensity layer=1 ub=10...
|
| 67 |
+
WARNING: intensity=-0.50 got 26/200 valid after 2000 rounds
|
| 68 |
+
WARNING: intensity=-0.25 got 20/200 valid after 2000 rounds
|
| 69 |
+
WARNING: intensity=0.00 got 16/200 valid after 2000 rounds
|
| 70 |
+
WARNING: intensity=0.25 got 15/200 valid after 2000 rounds
|
| 71 |
+
WARNING: intensity=0.50 got 26/200 valid after 2000 rounds
|
| 72 |
+
WARNING: intensity=0.75 got 10/200 valid after 2000 rounds
|
| 73 |
+
WARNING: intensity=1.00 got 19/200 valid after 2000 rounds
|
| 74 |
+
WARNING: intensity=1.25 got 15/200 valid after 2000 rounds
|
| 75 |
+
WARNING: intensity=1.50 got 19/200 valid after 2000 rounds
|
| 76 |
+
WARNING: intensity=1.75 got 25/200 valid after 2000 rounds
|
| 77 |
+
WARNING: intensity=2.00 got 13/200 valid after 2000 rounds
|
| 78 |
+
Done
|
| 79 |
+
Computing intensity layer=1 ub=15...
|
| 80 |
+
WARNING: intensity=-0.50 got 35/200 valid after 2000 rounds
|
| 81 |
+
WARNING: intensity=-0.25 got 40/200 valid after 2000 rounds
|
| 82 |
+
WARNING: intensity=0.00 got 43/200 valid after 2000 rounds
|
| 83 |
+
WARNING: intensity=0.25 got 48/200 valid after 2000 rounds
|
| 84 |
+
WARNING: intensity=0.50 got 41/200 valid after 2000 rounds
|
| 85 |
+
WARNING: intensity=0.75 got 40/200 valid after 2000 rounds
|
| 86 |
+
WARNING: intensity=1.00 got 49/200 valid after 2000 rounds
|
| 87 |
+
WARNING: intensity=1.25 got 41/200 valid after 2000 rounds
|
| 88 |
+
WARNING: intensity=1.50 got 50/200 valid after 2000 rounds
|
| 89 |
+
WARNING: intensity=1.75 got 42/200 valid after 2000 rounds
|
| 90 |
+
WARNING: intensity=2.00 got 33/200 valid after 2000 rounds
|
| 91 |
+
Done
|
| 92 |
+
Saved intensity plots
|
| 93 |
+
|
| 94 |
+
All plots saved to /mnt/task_runtime/new-grid/k16_N1024/plots
|
new-grid/k16_N1024/logs/train.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
new-grid/k16_N1024/plots/ablation_accuracy.png
ADDED
|
new-grid/k16_N1024/plots/ablation_conditional_accuracy.png
ADDED
|
Git LFS Details
|
new-grid/k16_N1024/plots/ablation_per_position.png
ADDED
|
Git LFS Details
|
new-grid/k16_N1024/plots/baseline_accuracy.png
ADDED
|
new-grid/k16_N1024/plots/baseline_conditional_accuracy.png
ADDED
|
new-grid/k16_N1024/plots/compare_cinclogits_layer0.png
ADDED
|
new-grid/k16_N1024/plots/compare_cinclogits_layer1.png
ADDED
|
new-grid/k16_N1024/plots/compare_intensity_layer0.png
ADDED
|
new-grid/k16_N1024/plots/compare_intensity_layer0_ub10.png
ADDED
|
new-grid/k16_N1024/plots/compare_intensity_layer0_ub15.png
ADDED
|
new-grid/k16_N1024/plots/compare_intensity_layer1.png
ADDED
|
new-grid/k16_N1024/plots/compare_intensity_layer1_ub10.png
ADDED
|
new-grid/k16_N1024/plots/compare_intensity_layer1_ub15.png
ADDED
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt10000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:765d5d9f57def63f3ede5dbbfab3b495a6a28f044ed558a5d4572b549f01202f
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt100000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0142814a4780a467a1483a32241e521cabd7ecca5a42985b26da3f1799d678a
|
| 3 |
+
size 460197
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt15000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7026ce175861b979018c67bf643e1d971079c144b315fa178f2d53c96c081bd6
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt20000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcbaa7866c2c1fa2dc4db6d456fc827a193ae56ab2fa302b7027aad6b17260b0
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt25000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d66a1d54009e8271bd08ae7fd8ee44d31ae0be2224da653b4778b64185ee6f5
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt30000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d6c3088169ff7ec63d5d923fd2fe0c19bd4ed53a2abbb59e63389d938187f8d
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt35000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1914757ce211e93f016d388279b244fb799310cc221abd7fc057fb1bf458708
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt40000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb0303c85ef22b204628a2978077504d12e897ebe98f6c858138db96c40b3406
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt45000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:204809ce16e1030d8dbd22d47bc64d52f051abdb544574a7b6dbf8e9af98a08a
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt5000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9f934045a698da4cf375b1956f42888547bca92f962b0338d81e5d62c92b8ff
|
| 3 |
+
size 460127
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt50000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0cca30da515491b950cb3ee8300b9bf0199a3badccf36ecd54ff8964819db68
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt55000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b56461e4843fda2646ae7e701dc734ee52b7d7e58416a1f524cbf5b6b765d066
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt60000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89fe6cefdeaabab231fb4819737d84d2916eef3d5a04bacf677fe1c190e6dba1
|
| 3 |
+
size 460162
|
new-grid/k16_N128/checkpoints/std0p01_iseed1__ckpt65000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18542b4c334f8a55f392e2c483047296aade91821cb5a567d465ec096401fdeb
|
| 3 |
+
size 460162
|