han1823123123 commited on
Commit
6291a90
·
verified ·
1 Parent(s): 23575ae

Upload training_logs/txcdr_t28__seed1.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_logs/txcdr_t28__seed1.json +105 -0
training_logs/txcdr_t28__seed1.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 127048.21875,
4
+ 9010.2509765625,
5
+ 8322.990234375,
6
+ 8019.96240234375,
7
+ 7647.80712890625,
8
+ 7438.34814453125,
9
+ 7287.4091796875,
10
+ 7144.302734375,
11
+ 7087.75927734375,
12
+ 6982.732421875,
13
+ 6938.48486328125,
14
+ 6905.73486328125,
15
+ 6874.05615234375,
16
+ 6854.83154296875,
17
+ 6804.0595703125,
18
+ 6737.779296875,
19
+ 6765.11083984375,
20
+ 6732.81982421875,
21
+ 6722.30810546875,
22
+ 6704.28662109375,
23
+ 6705.99267578125
24
+ ],
25
+ "l0": [
26
+ 500.0,
27
+ 496.436767578125,
28
+ 495.828369140625,
29
+ 496.11669921875,
30
+ 495.3564453125,
31
+ 495.84716796875,
32
+ 495.854248046875,
33
+ 496.427490234375,
34
+ 496.763427734375,
35
+ 494.985107421875,
36
+ 495.532470703125,
37
+ 496.35205078125,
38
+ 496.461181640625,
39
+ 496.23974609375,
40
+ 495.359375,
41
+ 495.136474609375,
42
+ 496.354248046875,
43
+ 495.443115234375,
44
+ 496.313720703125,
45
+ 495.83984375,
46
+ 494.866455078125
47
+ ],
48
+ "steps_logged": [
49
+ 0,
50
+ 200,
51
+ 400,
52
+ 600,
53
+ 800,
54
+ 1000,
55
+ 1200,
56
+ 1400,
57
+ 1600,
58
+ 1800,
59
+ 2000,
60
+ 2200,
61
+ 2400,
62
+ 2600,
63
+ 2800,
64
+ 3000,
65
+ 3200,
66
+ 3400,
67
+ 3600,
68
+ 3800,
69
+ 4000
70
+ ],
71
+ "final_step": 4000,
72
+ "converged": true,
73
+ "plateau_last": 0.015974250598195773,
74
+ "elapsed_s": 4118.382341384888,
75
+ "row": 28,
76
+ "arch_id": "txcdr_t28",
77
+ "arch": "txcdr_t28",
78
+ "group": 3,
79
+ "src_class": "TemporalCrosscoder",
80
+ "src_module": "src.architectures.crosscoder",
81
+ "T": 28,
82
+ "T_max": null,
83
+ "t_sample": null,
84
+ "n_layers": null,
85
+ "k_win": 500,
86
+ "k_pos": 18,
87
+ "shifts": null,
88
+ "alpha": null,
89
+ "gamma": null,
90
+ "n_scales": null,
91
+ "seed": 1,
92
+ "d_in": 2304,
93
+ "d_sae": 18432,
94
+ "subject_model": "google/gemma-2-2b",
95
+ "anchor_layer": 12,
96
+ "mlc_layers": [
97
+ 10,
98
+ 11,
99
+ 12,
100
+ 13,
101
+ 14
102
+ ],
103
+ "phase": "phase7_unification",
104
+ "run_id": "txcdr_t28__seed1"
105
+ }