interpgpt-sae-phase5 / feature_diff.json
connaaa's picture
Phase 5 release: 7 TopK SAEs + specificity / null-steering JSON artifacts
5f2451e verified
{
"sae_results": {
"standard_L1": {
"final_fve": 0.9156635482984559,
"final_l0": 40.0,
"dead_pct_train": 0.0244140625,
"dead_pct_heldout": 1.5380859375
},
"standard_L2": {
"final_fve": 0.9112300092129357,
"final_l0": 40.0,
"dead_pct_train": 0.0244140625,
"dead_pct_heldout": 1.220703125
},
"standard_L3": {
"final_fve": 0.8956661837197387,
"final_l0": 40.0,
"dead_pct_train": 0.0244140625,
"dead_pct_heldout": 1.025390625
},
"adhd_L1": {
"final_fve": 0.9020028367114843,
"final_l0": 40.0,
"dead_pct_train": 0.1220703125,
"dead_pct_heldout": 1.3671875
},
"adhd_L2": {
"final_fve": 0.8937416797380295,
"final_l0": 40.0,
"dead_pct_train": 0.0244140625,
"dead_pct_heldout": 1.46484375
},
"adhd_L3": {
"final_fve": 0.8691270860953982,
"final_l0": 40.0,
"dead_pct_train": 0.0,
"dead_pct_heldout": 2.2216796875
}
},
"primary_count": 312,
"symmetry_count": 139,
"layer_control_count": 194,
"primary_top_features": [
{
"feat_id": 2418,
"adhd_rate": 0.8974166512489319,
"std_rate": 0.0010833332780748606
},
{
"feat_id": 653,
"adhd_rate": 0.8386666774749756,
"std_rate": 0.0
},
{
"feat_id": 1216,
"adhd_rate": 0.6754166483879089,
"std_rate": 0.009583333507180214
},
{
"feat_id": 225,
"adhd_rate": 0.6380833387374878,
"std_rate": 0.0
},
{
"feat_id": 1131,
"adhd_rate": 0.6314166784286499,
"std_rate": 0.0
},
{
"feat_id": 2504,
"adhd_rate": 0.546999990940094,
"std_rate": 0.0005000000237487257
},
{
"feat_id": 1650,
"adhd_rate": 0.5131666660308838,
"std_rate": 8.333333244081587e-05
},
{
"feat_id": 2959,
"adhd_rate": 0.4754999876022339,
"std_rate": 0.00016666666488163173
},
{
"feat_id": 3953,
"adhd_rate": 0.4663333296775818,
"std_rate": 0.00016666666488163173
},
{
"feat_id": 352,
"adhd_rate": 0.42516666650772095,
"std_rate": 0.0016666667070239782
},
{
"feat_id": 702,
"adhd_rate": 0.4244999885559082,
"std_rate": 0.0
},
{
"feat_id": 2505,
"adhd_rate": 0.4099166691303253,
"std_rate": 0.0
},
{
"feat_id": 1156,
"adhd_rate": 0.3932499885559082,
"std_rate": 0.0
},
{
"feat_id": 2512,
"adhd_rate": 0.38883334398269653,
"std_rate": 0.0
},
{
"feat_id": 1835,
"adhd_rate": 0.3605000078678131,
"std_rate": 0.00016666666488163173
}
]
}