jrhuebers commited on
Commit
d561a97
·
verified ·
1 Parent(s): 838d00d

Upload FIM-ODE vdp1 training config, logging, etc...

Browse files
vdp1/vdp1_01-29-0707/logging/tensorboard/events.out.tfevents.1769670449.ml2ran03.lamarr.tu-dortmund.de.579545.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a39e9c56be94b85a0b4e04c07fbf4a9ce69dc792353eec17009b000f8ba44409
3
+ size 88
vdp1/vdp1_01-29-0707/logging/tensorboard/events.out.tfevents.1769670450.ml2ran03.lamarr.tu-dortmund.de.579545.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af41f211ff1f57ac49b8a96cfdd9cd21b441fabcfaf9bd69db5a5588cfa362a
3
+ size 1084965
vdp1/vdp1_01-29-0707/logging/train.log ADDED
The diff for this file is too large to render. See raw diff
 
vdp1/vdp1_01-29-0707/model_architecture.txt ADDED
@@ -0,0 +1,718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ==============================================================================================================
2
+ Layer (type:depth-idx) Output Shape Param #
3
+ ==============================================================================================================
4
+ TrainingWrapper -- 4,870,401
5
+ ├─FimOdeon: 1-1 -- --
6
+ │ └─TrajectoryEncoder: 2-1 -- 896
7
+ │ │ └─TransformerEncoder: 3-1 [1, 49, 256] 1,579,520
8
+ │ └─Sequential: 2-200 -- (recursive)
9
+ │ │ └─Linear: 3-2 [1, 3, 256] 1,024
10
+ │ │ └─ReLU: 3-3 [1, 3, 256] --
11
+ │ │ └─Linear: 3-4 [1, 3, 256] 65,792
12
+ │ └─AttentionOperator: 2-201 -- (recursive)
13
+ │ │ └─ModuleList: 3-500 -- (recursive)
14
+ │ │ └─MLP: 3-6 [1, 3, 3] 132,355
15
+ │ └─Sequential: 2-200 -- (recursive)
16
+ │ │ └─Linear: 3-7 [1, 3, 256] (recursive)
17
+ │ │ └─ReLU: 3-8 [1, 3, 256] --
18
+ │ │ └─Linear: 3-9 [1, 3, 256] (recursive)
19
+ │ └─AttentionOperator: 2-201 -- (recursive)
20
+ │ │ └─ModuleList: 3-500 -- (recursive)
21
+ │ │ └─MLP: 3-11 [1, 3, 3] (recursive)
22
+ │ └─Sequential: 2-200 -- (recursive)
23
+ │ │ └─Linear: 3-12 [1, 3, 256] (recursive)
24
+ │ │ └─ReLU: 3-13 [1, 3, 256] --
25
+ │ │ └─Linear: 3-14 [1, 3, 256] (recursive)
26
+ │ └─AttentionOperator: 2-201 -- (recursive)
27
+ │ │ └─ModuleList: 3-500 -- (recursive)
28
+ │ │ └─MLP: 3-16 [1, 3, 3] (recursive)
29
+ │ └─Sequential: 2-200 -- (recursive)
30
+ │ │ └─Linear: 3-17 [1, 3, 256] (recursive)
31
+ │ │ └─ReLU: 3-18 [1, 3, 256] --
32
+ │ │ └─Linear: 3-19 [1, 3, 256] (recursive)
33
+ │ └─AttentionOperator: 2-201 -- (recursive)
34
+ │ │ └─ModuleList: 3-500 -- (recursive)
35
+ │ │ └─MLP: 3-21 [1, 3, 3] (recursive)
36
+ │ └─Sequential: 2-200 -- (recursive)
37
+ │ │ └─Linear: 3-22 [1, 3, 256] (recursive)
38
+ │ │ └─ReLU: 3-23 [1, 3, 256] --
39
+ │ │ └─Linear: 3-24 [1, 3, 256] (recursive)
40
+ │ └─AttentionOperator: 2-201 -- (recursive)
41
+ │ │ └─ModuleList: 3-500 -- (recursive)
42
+ │ │ └─MLP: 3-26 [1, 3, 3] (recursive)
43
+ │ └─Sequential: 2-200 -- (recursive)
44
+ │ │ └─Linear: 3-27 [1, 3, 256] (recursive)
45
+ │ │ └─ReLU: 3-28 [1, 3, 256] --
46
+ │ │ └─Linear: 3-29 [1, 3, 256] (recursive)
47
+ │ └─AttentionOperator: 2-201 -- (recursive)
48
+ │ │ └─ModuleList: 3-500 -- (recursive)
49
+ │ │ └─MLP: 3-31 [1, 3, 3] (recursive)
50
+ │ └─Sequential: 2-200 -- (recursive)
51
+ │ │ └─Linear: 3-32 [1, 3, 256] (recursive)
52
+ │ │ └─ReLU: 3-33 [1, 3, 256] --
53
+ │ │ └─Linear: 3-34 [1, 3, 256] (recursive)
54
+ │ └─AttentionOperator: 2-201 -- (recursive)
55
+ │ │ └─ModuleList: 3-500 -- (recursive)
56
+ │ │ └─MLP: 3-36 [1, 3, 3] (recursive)
57
+ │ └─Sequential: 2-200 -- (recursive)
58
+ │ │ └─Linear: 3-37 [1, 3, 256] (recursive)
59
+ │ │ └─ReLU: 3-38 [1, 3, 256] --
60
+ │ │ └─Linear: 3-39 [1, 3, 256] (recursive)
61
+ │ └─AttentionOperator: 2-201 -- (recursive)
62
+ │ │ └─ModuleList: 3-500 -- (recursive)
63
+ │ │ └─MLP: 3-41 [1, 3, 3] (recursive)
64
+ │ └─Sequential: 2-200 -- (recursive)
65
+ │ │ └─Linear: 3-42 [1, 3, 256] (recursive)
66
+ │ │ └─ReLU: 3-43 [1, 3, 256] --
67
+ │ │ └─Linear: 3-44 [1, 3, 256] (recursive)
68
+ │ └─AttentionOperator: 2-201 -- (recursive)
69
+ │ │ └─ModuleList: 3-500 -- (recursive)
70
+ │ │ └─MLP: 3-46 [1, 3, 3] (recursive)
71
+ │ └─Sequential: 2-200 -- (recursive)
72
+ │ │ └─Linear: 3-47 [1, 3, 256] (recursive)
73
+ │ │ └─ReLU: 3-48 [1, 3, 256] --
74
+ │ │ └─Linear: 3-49 [1, 3, 256] (recursive)
75
+ │ └─AttentionOperator: 2-201 -- (recursive)
76
+ │ │ └─ModuleList: 3-500 -- (recursive)
77
+ │ │ └─MLP: 3-51 [1, 3, 3] (recursive)
78
+ │ └─Sequential: 2-200 -- (recursive)
79
+ │ │ └─Linear: 3-52 [1, 3, 256] (recursive)
80
+ │ │ └─ReLU: 3-53 [1, 3, 256] --
81
+ │ │ └─Linear: 3-54 [1, 3, 256] (recursive)
82
+ │ └─AttentionOperator: 2-201 -- (recursive)
83
+ │ │ └─ModuleList: 3-500 -- (recursive)
84
+ │ │ └─MLP: 3-56 [1, 3, 3] (recursive)
85
+ │ └─Sequential: 2-200 -- (recursive)
86
+ │ │ └─Linear: 3-57 [1, 3, 256] (recursive)
87
+ │ │ └─ReLU: 3-58 [1, 3, 256] --
88
+ │ │ └─Linear: 3-59 [1, 3, 256] (recursive)
89
+ │ └─AttentionOperator: 2-201 -- (recursive)
90
+ │ │ └─ModuleList: 3-500 -- (recursive)
91
+ │ │ └─MLP: 3-61 [1, 3, 3] (recursive)
92
+ │ └─Sequential: 2-200 -- (recursive)
93
+ │ │ └─Linear: 3-62 [1, 3, 256] (recursive)
94
+ │ │ └─ReLU: 3-63 [1, 3, 256] --
95
+ │ │ └─Linear: 3-64 [1, 3, 256] (recursive)
96
+ │ └─AttentionOperator: 2-201 -- (recursive)
97
+ │ │ └─ModuleList: 3-500 -- (recursive)
98
+ │ │ └─MLP: 3-66 [1, 3, 3] (recursive)
99
+ │ └─Sequential: 2-200 -- (recursive)
100
+ │ │ └─Linear: 3-67 [1, 3, 256] (recursive)
101
+ │ │ └─ReLU: 3-68 [1, 3, 256] --
102
+ │ │ └─Linear: 3-69 [1, 3, 256] (recursive)
103
+ │ └─AttentionOperator: 2-201 -- (recursive)
104
+ │ │ └─ModuleList: 3-500 -- (recursive)
105
+ │ │ └─MLP: 3-71 [1, 3, 3] (recursive)
106
+ │ └─Sequential: 2-200 -- (recursive)
107
+ │ │ └─Linear: 3-72 [1, 3, 256] (recursive)
108
+ │ │ └─ReLU: 3-73 [1, 3, 256] --
109
+ │ │ └─Linear: 3-74 [1, 3, 256] (recursive)
110
+ │ └─AttentionOperator: 2-201 -- (recursive)
111
+ │ │ └─ModuleList: 3-500 -- (recursive)
112
+ │ │ └─MLP: 3-76 [1, 3, 3] (recursive)
113
+ │ └─Sequential: 2-200 -- (recursive)
114
+ │ │ └─Linear: 3-77 [1, 3, 256] (recursive)
115
+ │ │ └─ReLU: 3-78 [1, 3, 256] --
116
+ │ │ └─Linear: 3-79 [1, 3, 256] (recursive)
117
+ │ └─AttentionOperator: 2-201 -- (recursive)
118
+ │ │ └─ModuleList: 3-500 -- (recursive)
119
+ │ │ └─MLP: 3-81 [1, 3, 3] (recursive)
120
+ │ └─Sequential: 2-200 -- (recursive)
121
+ │ │ └─Linear: 3-82 [1, 3, 256] (recursive)
122
+ │ │ └─ReLU: 3-83 [1, 3, 256] --
123
+ │ │ └─Linear: 3-84 [1, 3, 256] (recursive)
124
+ │ └─AttentionOperator: 2-201 -- (recursive)
125
+ │ │ └─ModuleList: 3-500 -- (recursive)
126
+ │ │ └─MLP: 3-86 [1, 3, 3] (recursive)
127
+ │ └─Sequential: 2-200 -- (recursive)
128
+ │ │ └─Linear: 3-87 [1, 3, 256] (recursive)
129
+ │ │ └─ReLU: 3-88 [1, 3, 256] --
130
+ │ │ └─Linear: 3-89 [1, 3, 256] (recursive)
131
+ │ └─AttentionOperator: 2-201 -- (recursive)
132
+ │ │ └─ModuleList: 3-500 -- (recursive)
133
+ │ │ └─MLP: 3-91 [1, 3, 3] (recursive)
134
+ │ └─Sequential: 2-200 -- (recursive)
135
+ │ │ └─Linear: 3-92 [1, 3, 256] (recursive)
136
+ │ │ └─ReLU: 3-93 [1, 3, 256] --
137
+ │ │ └─Linear: 3-94 [1, 3, 256] (recursive)
138
+ │ └─AttentionOperator: 2-201 -- (recursive)
139
+ │ │ └─ModuleList: 3-500 -- (recursive)
140
+ │ │ └─MLP: 3-96 [1, 3, 3] (recursive)
141
+ │ └─Sequential: 2-200 -- (recursive)
142
+ │ │ └─Linear: 3-97 [1, 3, 256] (recursive)
143
+ │ │ └─ReLU: 3-98 [1, 3, 256] --
144
+ │ │ └─Linear: 3-99 [1, 3, 256] (recursive)
145
+ │ └─AttentionOperator: 2-201 -- (recursive)
146
+ │ │ └─ModuleList: 3-500 -- (recursive)
147
+ │ │ └─MLP: 3-101 [1, 3, 3] (recursive)
148
+ │ └─Sequential: 2-200 -- (recursive)
149
+ │ │ └─Linear: 3-102 [1, 3, 256] (recursive)
150
+ │ │ └─ReLU: 3-103 [1, 3, 256] --
151
+ │ │ └─Linear: 3-104 [1, 3, 256] (recursive)
152
+ │ └─AttentionOperator: 2-201 -- (recursive)
153
+ │ │ └─ModuleList: 3-500 -- (recursive)
154
+ │ │ └─MLP: 3-106 [1, 3, 3] (recursive)
155
+ │ └─Sequential: 2-200 -- (recursive)
156
+ │ │ └─Linear: 3-107 [1, 3, 256] (recursive)
157
+ │ │ └─ReLU: 3-108 [1, 3, 256] --
158
+ │ │ └─Linear: 3-109 [1, 3, 256] (recursive)
159
+ │ └─AttentionOperator: 2-201 -- (recursive)
160
+ │ │ └─ModuleList: 3-500 -- (recursive)
161
+ │ │ └─MLP: 3-111 [1, 3, 3] (recursive)
162
+ │ └─Sequential: 2-200 -- (recursive)
163
+ │ │ └─Linear: 3-112 [1, 3, 256] (recursive)
164
+ │ │ └─ReLU: 3-113 [1, 3, 256] --
165
+ │ │ └─Linear: 3-114 [1, 3, 256] (recursive)
166
+ │ └─AttentionOperator: 2-201 -- (recursive)
167
+ │ │ └─ModuleList: 3-500 -- (recursive)
168
+ │ │ └─MLP: 3-116 [1, 3, 3] (recursive)
169
+ │ └─Sequential: 2-200 -- (recursive)
170
+ │ │ └─Linear: 3-117 [1, 3, 256] (recursive)
171
+ │ │ └─ReLU: 3-118 [1, 3, 256] --
172
+ │ │ └─Linear: 3-119 [1, 3, 256] (recursive)
173
+ │ └─AttentionOperator: 2-201 -- (recursive)
174
+ │ │ └─ModuleList: 3-500 -- (recursive)
175
+ │ │ └─MLP: 3-121 [1, 3, 3] (recursive)
176
+ │ └─Sequential: 2-200 -- (recursive)
177
+ │ │ └─Linear: 3-122 [1, 3, 256] (recursive)
178
+ │ │ └─ReLU: 3-123 [1, 3, 256] --
179
+ │ │ └─Linear: 3-124 [1, 3, 256] (recursive)
180
+ │ └─AttentionOperator: 2-201 -- (recursive)
181
+ │ │ └─ModuleList: 3-500 -- (recursive)
182
+ │ │ └─MLP: 3-126 [1, 3, 3] (recursive)
183
+ │ └─Sequential: 2-200 -- (recursive)
184
+ │ │ └─Linear: 3-127 [1, 3, 256] (recursive)
185
+ │ │ └─ReLU: 3-128 [1, 3, 256] --
186
+ │ │ └─Linear: 3-129 [1, 3, 256] (recursive)
187
+ │ └─AttentionOperator: 2-201 -- (recursive)
188
+ │ │ └─ModuleList: 3-500 -- (recursive)
189
+ │ │ └─MLP: 3-131 [1, 3, 3] (recursive)
190
+ │ └─Sequential: 2-200 -- (recursive)
191
+ │ │ └─Linear: 3-132 [1, 3, 256] (recursive)
192
+ │ │ └─ReLU: 3-133 [1, 3, 256] --
193
+ │ │ └─Linear: 3-134 [1, 3, 256] (recursive)
194
+ │ └─AttentionOperator: 2-201 -- (recursive)
195
+ │ │ └─ModuleList: 3-500 -- (recursive)
196
+ │ │ └─MLP: 3-136 [1, 3, 3] (recursive)
197
+ │ └─Sequential: 2-200 -- (recursive)
198
+ │ │ └─Linear: 3-137 [1, 3, 256] (recursive)
199
+ │ │ └─ReLU: 3-138 [1, 3, 256] --
200
+ │ │ └─Linear: 3-139 [1, 3, 256] (recursive)
201
+ │ └─AttentionOperator: 2-201 -- (recursive)
202
+ │ │ └─ModuleList: 3-500 -- (recursive)
203
+ │ │ └─MLP: 3-141 [1, 3, 3] (recursive)
204
+ │ └─Sequential: 2-200 -- (recursive)
205
+ │ │ └─Linear: 3-142 [1, 3, 256] (recursive)
206
+ │ │ └─ReLU: 3-143 [1, 3, 256] --
207
+ │ │ └─Linear: 3-144 [1, 3, 256] (recursive)
208
+ │ └─AttentionOperator: 2-201 -- (recursive)
209
+ │ │ └─ModuleList: 3-500 -- (recursive)
210
+ │ │ └─MLP: 3-146 [1, 3, 3] (recursive)
211
+ │ └─Sequential: 2-200 -- (recursive)
212
+ │ │ └─Linear: 3-147 [1, 3, 256] (recursive)
213
+ │ │ └─ReLU: 3-148 [1, 3, 256] --
214
+ │ │ └─Linear: 3-149 [1, 3, 256] (recursive)
215
+ │ └─AttentionOperator: 2-201 -- (recursive)
216
+ │ │ └─ModuleList: 3-500 -- (recursive)
217
+ │ │ └─MLP: 3-151 [1, 3, 3] (recursive)
218
+ │ └─Sequential: 2-200 -- (recursive)
219
+ │ │ └─Linear: 3-152 [1, 3, 256] (recursive)
220
+ │ │ └─ReLU: 3-153 [1, 3, 256] --
221
+ │ │ └─Linear: 3-154 [1, 3, 256] (recursive)
222
+ │ └─AttentionOperator: 2-201 -- (recursive)
223
+ │ │ └─ModuleList: 3-500 -- (recursive)
224
+ │ │ └─MLP: 3-156 [1, 3, 3] (recursive)
225
+ │ └─Sequential: 2-200 -- (recursive)
226
+ │ │ └─Linear: 3-157 [1, 3, 256] (recursive)
227
+ │ │ └─ReLU: 3-158 [1, 3, 256] --
228
+ │ │ └─Linear: 3-159 [1, 3, 256] (recursive)
229
+ │ └─AttentionOperator: 2-201 -- (recursive)
230
+ │ │ └─ModuleList: 3-500 -- (recursive)
231
+ │ │ └─MLP: 3-161 [1, 3, 3] (recursive)
232
+ │ └─Sequential: 2-200 -- (recursive)
233
+ │ │ └─Linear: 3-162 [1, 3, 256] (recursive)
234
+ │ │ └─ReLU: 3-163 [1, 3, 256] --
235
+ │ │ └─Linear: 3-164 [1, 3, 256] (recursive)
236
+ │ └─AttentionOperator: 2-201 -- (recursive)
237
+ │ │ └─ModuleList: 3-500 -- (recursive)
238
+ │ │ └─MLP: 3-166 [1, 3, 3] (recursive)
239
+ │ └─Sequential: 2-200 -- (recursive)
240
+ │ │ └─Linear: 3-167 [1, 3, 256] (recursive)
241
+ │ │ └─ReLU: 3-168 [1, 3, 256] --
242
+ │ │ └─Linear: 3-169 [1, 3, 256] (recursive)
243
+ │ └─AttentionOperator: 2-201 -- (recursive)
244
+ │ │ └─ModuleList: 3-500 -- (recursive)
245
+ │ │ └─MLP: 3-171 [1, 3, 3] (recursive)
246
+ │ └─Sequential: 2-200 -- (recursive)
247
+ │ │ └─Linear: 3-172 [1, 3, 256] (recursive)
248
+ │ │ └─ReLU: 3-173 [1, 3, 256] --
249
+ │ │ └─Linear: 3-174 [1, 3, 256] (recursive)
250
+ │ └─AttentionOperator: 2-201 -- (recursive)
251
+ │ │ └─ModuleList: 3-500 -- (recursive)
252
+ │ │ └─MLP: 3-176 [1, 3, 3] (recursive)
253
+ │ └─Sequential: 2-200 -- (recursive)
254
+ │ │ └─Linear: 3-177 [1, 3, 256] (recursive)
255
+ │ │ └─ReLU: 3-178 [1, 3, 256] --
256
+ │ │ └─Linear: 3-179 [1, 3, 256] (recursive)
257
+ │ └─AttentionOperator: 2-201 -- (recursive)
258
+ │ │ └─ModuleList: 3-500 -- (recursive)
259
+ │ │ └─MLP: 3-181 [1, 3, 3] (recursive)
260
+ │ └─Sequential: 2-200 -- (recursive)
261
+ │ │ └─Linear: 3-182 [1, 3, 256] (recursive)
262
+ │ │ └─ReLU: 3-183 [1, 3, 256] --
263
+ │ │ └─Linear: 3-184 [1, 3, 256] (recursive)
264
+ │ └─AttentionOperator: 2-201 -- (recursive)
265
+ │ │ └─ModuleList: 3-500 -- (recursive)
266
+ │ │ └─MLP: 3-186 [1, 3, 3] (recursive)
267
+ │ └─Sequential: 2-200 -- (recursive)
268
+ │ │ └─Linear: 3-187 [1, 3, 256] (recursive)
269
+ │ │ └─ReLU: 3-188 [1, 3, 256] --
270
+ │ │ └─Linear: 3-189 [1, 3, 256] (recursive)
271
+ │ └─AttentionOperator: 2-201 -- (recursive)
272
+ │ │ └─ModuleList: 3-500 -- (recursive)
273
+ │ │ └─MLP: 3-191 [1, 3, 3] (recursive)
274
+ │ └─Sequential: 2-200 -- (recursive)
275
+ │ │ └─Linear: 3-192 [1, 3, 256] (recursive)
276
+ │ │ └─ReLU: 3-193 [1, 3, 256] --
277
+ │ │ └─Linear: 3-194 [1, 3, 256] (recursive)
278
+ │ └─AttentionOperator: 2-201 -- (recursive)
279
+ │ │ └─ModuleList: 3-500 -- (recursive)
280
+ │ │ └─MLP: 3-196 [1, 3, 3] (recursive)
281
+ │ └─Sequential: 2-200 -- (recursive)
282
+ │ │ └─Linear: 3-197 [1, 3, 256] (recursive)
283
+ │ │ └─ReLU: 3-198 [1, 3, 256] --
284
+ │ │ └─Linear: 3-199 [1, 3, 256] (recursive)
285
+ │ └─AttentionOperator: 2-201 -- (recursive)
286
+ │ │ └─ModuleList: 3-500 -- (recursive)
287
+ │ │ └─MLP: 3-201 [1, 3, 3] (recursive)
288
+ │ └─Sequential: 2-200 -- (recursive)
289
+ │ │ └─Linear: 3-202 [1, 3, 256] (recursive)
290
+ │ │ └─ReLU: 3-203 [1, 3, 256] --
291
+ │ │ └─Linear: 3-204 [1, 3, 256] (recursive)
292
+ │ └─AttentionOperator: 2-201 -- (recursive)
293
+ │ │ └─ModuleList: 3-500 -- (recursive)
294
+ │ │ └─MLP: 3-206 [1, 3, 3] (recursive)
295
+ │ └─Sequential: 2-200 -- (recursive)
296
+ │ │ └─Linear: 3-207 [1, 3, 256] (recursive)
297
+ │ │ └─ReLU: 3-208 [1, 3, 256] --
298
+ │ │ └─Linear: 3-209 [1, 3, 256] (recursive)
299
+ │ └─AttentionOperator: 2-201 -- (recursive)
300
+ │ │ └─ModuleList: 3-500 -- (recursive)
301
+ │ │ └─MLP: 3-211 [1, 3, 3] (recursive)
302
+ │ └─Sequential: 2-200 -- (recursive)
303
+ │ │ └─Linear: 3-212 [1, 3, 256] (recursive)
304
+ │ │ └─ReLU: 3-213 [1, 3, 256] --
305
+ │ │ └─Linear: 3-214 [1, 3, 256] (recursive)
306
+ │ └─AttentionOperator: 2-201 -- (recursive)
307
+ │ │ └─ModuleList: 3-500 -- (recursive)
308
+ │ │ └─MLP: 3-216 [1, 3, 3] (recursive)
309
+ │ └─Sequential: 2-200 -- (recursive)
310
+ │ │ └─Linear: 3-217 [1, 3, 256] (recursive)
311
+ │ │ └─ReLU: 3-218 [1, 3, 256] --
312
+ │ │ └─Linear: 3-219 [1, 3, 256] (recursive)
313
+ │ └─AttentionOperator: 2-201 -- (recursive)
314
+ │ │ └─ModuleList: 3-500 -- (recursive)
315
+ │ │ └─MLP: 3-221 [1, 3, 3] (recursive)
316
+ │ └─Sequential: 2-200 -- (recursive)
317
+ │ │ └─Linear: 3-222 [1, 3, 256] (recursive)
318
+ │ │ └─ReLU: 3-223 [1, 3, 256] --
319
+ │ │ └─Linear: 3-224 [1, 3, 256] (recursive)
320
+ │ └─AttentionOperator: 2-201 -- (recursive)
321
+ │ │ └─ModuleList: 3-500 -- (recursive)
322
+ │ │ └─MLP: 3-226 [1, 3, 3] (recursive)
323
+ │ └─Sequential: 2-200 -- (recursive)
324
+ │ │ └─Linear: 3-227 [1, 3, 256] (recursive)
325
+ │ │ └─ReLU: 3-228 [1, 3, 256] --
326
+ │ │ └─Linear: 3-229 [1, 3, 256] (recursive)
327
+ │ └─AttentionOperator: 2-201 -- (recursive)
328
+ │ │ └─ModuleList: 3-500 -- (recursive)
329
+ │ │ └─MLP: 3-231 [1, 3, 3] (recursive)
330
+ │ └─Sequential: 2-200 -- (recursive)
331
+ │ │ └─Linear: 3-232 [1, 3, 256] (recursive)
332
+ │ │ └─ReLU: 3-233 [1, 3, 256] --
333
+ │ │ └─Linear: 3-234 [1, 3, 256] (recursive)
334
+ │ └─AttentionOperator: 2-201 -- (recursive)
335
+ │ │ └─ModuleList: 3-500 -- (recursive)
336
+ │ │ └─MLP: 3-236 [1, 3, 3] (recursive)
337
+ │ └─Sequential: 2-200 -- (recursive)
338
+ │ │ └─Linear: 3-237 [1, 3, 256] (recursive)
339
+ │ │ └─ReLU: 3-238 [1, 3, 256] --
340
+ │ │ └─Linear: 3-239 [1, 3, 256] (recursive)
341
+ │ └─AttentionOperator: 2-201 -- (recursive)
342
+ │ │ └─ModuleList: 3-500 -- (recursive)
343
+ │ │ └─MLP: 3-241 [1, 3, 3] (recursive)
344
+ │ └─Sequential: 2-200 -- (recursive)
345
+ │ │ └─Linear: 3-242 [1, 3, 256] (recursive)
346
+ │ │ └─ReLU: 3-243 [1, 3, 256] --
347
+ │ │ └─Linear: 3-244 [1, 3, 256] (recursive)
348
+ │ └─AttentionOperator: 2-201 -- (recursive)
349
+ │ │ └─ModuleList: 3-500 -- (recursive)
350
+ │ │ └─MLP: 3-246 [1, 3, 3] (recursive)
351
+ │ └─Sequential: 2-200 -- (recursive)
352
+ │ │ └─Linear: 3-247 [1, 3, 256] (recursive)
353
+ │ │ └─ReLU: 3-248 [1, 3, 256] --
354
+ │ │ └─Linear: 3-249 [1, 3, 256] (recursive)
355
+ │ └─AttentionOperator: 2-201 -- (recursive)
356
+ │ │ └─ModuleList: 3-500 -- (recursive)
357
+ │ │ └─MLP: 3-251 [1, 3, 3] (recursive)
358
+ │ └─Sequential: 2-200 -- (recursive)
359
+ │ │ └─Linear: 3-252 [1, 3, 256] (recursive)
360
+ │ │ └─ReLU: 3-253 [1, 3, 256] --
361
+ │ │ └─Linear: 3-254 [1, 3, 256] (recursive)
362
+ │ └─AttentionOperator: 2-201 -- (recursive)
363
+ │ │ └─ModuleList: 3-500 -- (recursive)
364
+ │ │ └─MLP: 3-256 [1, 3, 3] (recursive)
365
+ │ └─Sequential: 2-200 -- (recursive)
366
+ │ │ └─Linear: 3-257 [1, 3, 256] (recursive)
367
+ │ │ └─ReLU: 3-258 [1, 3, 256] --
368
+ │ │ └─Linear: 3-259 [1, 3, 256] (recursive)
369
+ │ └─AttentionOperator: 2-201 -- (recursive)
370
+ │ │ └─ModuleList: 3-500 -- (recursive)
371
+ │ │ └─MLP: 3-261 [1, 3, 3] (recursive)
372
+ │ └─Sequential: 2-200 -- (recursive)
373
+ │ │ └─Linear: 3-262 [1, 3, 256] (recursive)
374
+ │ │ └─ReLU: 3-263 [1, 3, 256] --
375
+ │ │ └─Linear: 3-264 [1, 3, 256] (recursive)
376
+ │ └─AttentionOperator: 2-201 -- (recursive)
377
+ │ │ └─ModuleList: 3-500 -- (recursive)
378
+ │ │ └─MLP: 3-266 [1, 3, 3] (recursive)
379
+ │ └─Sequential: 2-200 -- (recursive)
380
+ │ │ └─Linear: 3-267 [1, 3, 256] (recursive)
381
+ │ │ └─ReLU: 3-268 [1, 3, 256] --
382
+ │ │ └─Linear: 3-269 [1, 3, 256] (recursive)
383
+ │ └─AttentionOperator: 2-201 -- (recursive)
384
+ │ │ └─ModuleList: 3-500 -- (recursive)
385
+ │ │ └─MLP: 3-271 [1, 3, 3] (recursive)
386
+ │ └─Sequential: 2-200 -- (recursive)
387
+ │ │ └─Linear: 3-272 [1, 3, 256] (recursive)
388
+ │ │ └─ReLU: 3-273 [1, 3, 256] --
389
+ │ │ └─Linear: 3-274 [1, 3, 256] (recursive)
390
+ │ └─AttentionOperator: 2-201 -- (recursive)
391
+ │ │ └─ModuleList: 3-500 -- (recursive)
392
+ │ │ └─MLP: 3-276 [1, 3, 3] (recursive)
393
+ │ └─Sequential: 2-200 -- (recursive)
394
+ │ │ └─Linear: 3-277 [1, 3, 256] (recursive)
395
+ │ │ └─ReLU: 3-278 [1, 3, 256] --
396
+ │ │ └─Linear: 3-279 [1, 3, 256] (recursive)
397
+ │ └─AttentionOperator: 2-201 -- (recursive)
398
+ │ │ └─ModuleList: 3-500 -- (recursive)
399
+ │ │ └─MLP: 3-281 [1, 3, 3] (recursive)
400
+ │ └─Sequential: 2-200 -- (recursive)
401
+ │ │ └─Linear: 3-282 [1, 3, 256] (recursive)
402
+ │ │ └─ReLU: 3-283 [1, 3, 256] --
403
+ │ │ └─Linear: 3-284 [1, 3, 256] (recursive)
404
+ │ └─AttentionOperator: 2-201 -- (recursive)
405
+ │ │ └─ModuleList: 3-500 -- (recursive)
406
+ │ │ └─MLP: 3-286 [1, 3, 3] (recursive)
407
+ │ └─Sequential: 2-200 -- (recursive)
408
+ │ │ └─Linear: 3-287 [1, 3, 256] (recursive)
409
+ │ │ └─ReLU: 3-288 [1, 3, 256] --
410
+ │ │ └─Linear: 3-289 [1, 3, 256] (recursive)
411
+ │ └─AttentionOperator: 2-201 -- (recursive)
412
+ │ │ └─ModuleList: 3-500 -- (recursive)
413
+ │ │ └─MLP: 3-291 [1, 3, 3] (recursive)
414
+ │ └─Sequential: 2-200 -- (recursive)
415
+ │ │ └─Linear: 3-292 [1, 3, 256] (recursive)
416
+ │ │ └─ReLU: 3-293 [1, 3, 256] --
417
+ │ │ └─Linear: 3-294 [1, 3, 256] (recursive)
418
+ │ └─AttentionOperator: 2-201 -- (recursive)
419
+ │ │ └─ModuleList: 3-500 -- (recursive)
420
+ │ │ └─MLP: 3-296 [1, 3, 3] (recursive)
421
+ │ └─Sequential: 2-200 -- (recursive)
422
+ │ │ └─Linear: 3-297 [1, 3, 256] (recursive)
423
+ │ │ └─ReLU: 3-298 [1, 3, 256] --
424
+ │ │ └─Linear: 3-299 [1, 3, 256] (recursive)
425
+ │ └─AttentionOperator: 2-201 -- (recursive)
426
+ │ │ └─ModuleList: 3-500 -- (recursive)
427
+ │ │ └─MLP: 3-301 [1, 3, 3] (recursive)
428
+ │ └─Sequential: 2-200 -- (recursive)
429
+ │ │ └─Linear: 3-302 [1, 3, 256] (recursive)
430
+ │ │ └─ReLU: 3-303 [1, 3, 256] --
431
+ │ │ └─Linear: 3-304 [1, 3, 256] (recursive)
432
+ │ └─AttentionOperator: 2-201 -- (recursive)
433
+ │ │ └─ModuleList: 3-500 -- (recursive)
434
+ │ │ └─MLP: 3-306 [1, 3, 3] (recursive)
435
+ │ └─Sequential: 2-200 -- (recursive)
436
+ │ │ └─Linear: 3-307 [1, 3, 256] (recursive)
437
+ │ │ └─ReLU: 3-308 [1, 3, 256] --
438
+ │ │ └─Linear: 3-309 [1, 3, 256] (recursive)
439
+ │ └─AttentionOperator: 2-201 -- (recursive)
440
+ │ │ └─ModuleList: 3-500 -- (recursive)
441
+ │ │ └─MLP: 3-311 [1, 3, 3] (recursive)
442
+ │ └─Sequential: 2-200 -- (recursive)
443
+ │ │ └─Linear: 3-312 [1, 3, 256] (recursive)
444
+ │ │ └─ReLU: 3-313 [1, 3, 256] --
445
+ │ │ └─Linear: 3-314 [1, 3, 256] (recursive)
446
+ │ └─AttentionOperator: 2-201 -- (recursive)
447
+ │ │ └─ModuleList: 3-500 -- (recursive)
448
+ │ │ └─MLP: 3-316 [1, 3, 3] (recursive)
449
+ │ └─Sequential: 2-200 -- (recursive)
450
+ │ │ └─Linear: 3-317 [1, 3, 256] (recursive)
451
+ │ │ └─ReLU: 3-318 [1, 3, 256] --
452
+ │ │ └─Linear: 3-319 [1, 3, 256] (recursive)
453
+ │ └─AttentionOperator: 2-201 -- (recursive)
454
+ │ │ └─ModuleList: 3-500 -- (recursive)
455
+ │ │ └─MLP: 3-321 [1, 3, 3] (recursive)
456
+ │ └─Sequential: 2-200 -- (recursive)
457
+ │ │ └─Linear: 3-322 [1, 3, 256] (recursive)
458
+ │ │ └─ReLU: 3-323 [1, 3, 256] --
459
+ │ │ └─Linear: 3-324 [1, 3, 256] (recursive)
460
+ │ └─AttentionOperator: 2-201 -- (recursive)
461
+ │ │ └─ModuleList: 3-500 -- (recursive)
462
+ │ │ └─MLP: 3-326 [1, 3, 3] (recursive)
463
+ │ └─Sequential: 2-200 -- (recursive)
464
+ │ │ └─Linear: 3-327 [1, 3, 256] (recursive)
465
+ │ │ └─ReLU: 3-328 [1, 3, 256] --
466
+ │ │ └─Linear: 3-329 [1, 3, 256] (recursive)
467
+ │ └─AttentionOperator: 2-201 -- (recursive)
468
+ │ │ └─ModuleList: 3-500 -- (recursive)
469
+ │ │ └─MLP: 3-331 [1, 3, 3] (recursive)
470
+ │ └─Sequential: 2-200 -- (recursive)
471
+ │ │ └─Linear: 3-332 [1, 3, 256] (recursive)
472
+ │ │ └─ReLU: 3-333 [1, 3, 256] --
473
+ │ │ └─Linear: 3-334 [1, 3, 256] (recursive)
474
+ │ └─AttentionOperator: 2-201 -- (recursive)
475
+ │ │ └─ModuleList: 3-500 -- (recursive)
476
+ │ │ └─MLP: 3-336 [1, 3, 3] (recursive)
477
+ │ └─Sequential: 2-200 -- (recursive)
478
+ │ │ └─Linear: 3-337 [1, 3, 256] (recursive)
479
+ │ │ └─ReLU: 3-338 [1, 3, 256] --
480
+ │ │ └─Linear: 3-339 [1, 3, 256] (recursive)
481
+ │ └─AttentionOperator: 2-201 -- (recursive)
482
+ │ │ └─ModuleList: 3-500 -- (recursive)
483
+ │ │ └─MLP: 3-341 [1, 3, 3] (recursive)
484
+ │ └─Sequential: 2-200 -- (recursive)
485
+ │ │ └─Linear: 3-342 [1, 3, 256] (recursive)
486
+ │ │ └─ReLU: 3-343 [1, 3, 256] --
487
+ │ │ └─Linear: 3-344 [1, 3, 256] (recursive)
488
+ │ └─AttentionOperator: 2-201 -- (recursive)
489
+ │ │ └─ModuleList: 3-500 -- (recursive)
490
+ │ │ └─MLP: 3-346 [1, 3, 3] (recursive)
491
+ │ └─Sequential: 2-200 -- (recursive)
492
+ │ │ └─Linear: 3-347 [1, 3, 256] (recursive)
493
+ │ │ └─ReLU: 3-348 [1, 3, 256] --
494
+ │ │ └─Linear: 3-349 [1, 3, 256] (recursive)
495
+ │ └─AttentionOperator: 2-201 -- (recursive)
496
+ │ │ └─ModuleList: 3-500 -- (recursive)
497
+ │ │ └─MLP: 3-351 [1, 3, 3] (recursive)
498
+ │ └─Sequential: 2-200 -- (recursive)
499
+ │ │ └─Linear: 3-352 [1, 3, 256] (recursive)
500
+ │ │ └─ReLU: 3-353 [1, 3, 256] --
501
+ │ │ └─Linear: 3-354 [1, 3, 256] (recursive)
502
+ │ └─AttentionOperator: 2-201 -- (recursive)
503
+ │ │ └─ModuleList: 3-500 -- (recursive)
504
+ │ │ └─MLP: 3-356 [1, 3, 3] (recursive)
505
+ │ └─Sequential: 2-200 -- (recursive)
506
+ │ │ └─Linear: 3-357 [1, 3, 256] (recursive)
507
+ │ │ └─ReLU: 3-358 [1, 3, 256] --
508
+ │ │ └─Linear: 3-359 [1, 3, 256] (recursive)
509
+ │ └─AttentionOperator: 2-201 -- (recursive)
510
+ │ │ └─ModuleList: 3-500 -- (recursive)
511
+ │ │ └─MLP: 3-361 [1, 3, 3] (recursive)
512
+ │ └─Sequential: 2-200 -- (recursive)
513
+ │ │ └─Linear: 3-362 [1, 3, 256] (recursive)
514
+ │ │ └─ReLU: 3-363 [1, 3, 256] --
515
+ │ │ └─Linear: 3-364 [1, 3, 256] (recursive)
516
+ │ └─AttentionOperator: 2-201 -- (recursive)
517
+ │ │ └─ModuleList: 3-500 -- (recursive)
518
+ │ │ └─MLP: 3-366 [1, 3, 3] (recursive)
519
+ │ └─Sequential: 2-200 -- (recursive)
520
+ │ │ └─Linear: 3-367 [1, 3, 256] (recursive)
521
+ │ │ └─ReLU: 3-368 [1, 3, 256] --
522
+ │ │ └─Linear: 3-369 [1, 3, 256] (recursive)
523
+ │ └─AttentionOperator: 2-201 -- (recursive)
524
+ │ │ └─ModuleList: 3-500 -- (recursive)
525
+ │ │ └─MLP: 3-371 [1, 3, 3] (recursive)
526
+ │ └─Sequential: 2-200 -- (recursive)
527
+ │ │ └─Linear: 3-372 [1, 3, 256] (recursive)
528
+ │ │ └─ReLU: 3-373 [1, 3, 256] --
529
+ │ │ └─Linear: 3-374 [1, 3, 256] (recursive)
530
+ │ └─AttentionOperator: 2-201 -- (recursive)
531
+ │ │ └─ModuleList: 3-500 -- (recursive)
532
+ │ │ └─MLP: 3-376 [1, 3, 3] (recursive)
533
+ │ └─Sequential: 2-200 -- (recursive)
534
+ │ │ └─Linear: 3-377 [1, 3, 256] (recursive)
535
+ │ │ └─ReLU: 3-378 [1, 3, 256] --
536
+ │ │ └─Linear: 3-379 [1, 3, 256] (recursive)
537
+ │ └─AttentionOperator: 2-201 -- (recursive)
538
+ │ │ └─ModuleList: 3-500 -- (recursive)
539
+ │ │ └─MLP: 3-381 [1, 3, 3] (recursive)
540
+ │ └─Sequential: 2-200 -- (recursive)
541
+ │ │ └─Linear: 3-382 [1, 3, 256] (recursive)
542
+ │ │ └─ReLU: 3-383 [1, 3, 256] --
543
+ │ │ └─Linear: 3-384 [1, 3, 256] (recursive)
544
+ │ └─AttentionOperator: 2-201 -- (recursive)
545
+ │ │ └─ModuleList: 3-500 -- (recursive)
546
+ │ │ └─MLP: 3-386 [1, 3, 3] (recursive)
547
+ │ └─Sequential: 2-200 -- (recursive)
548
+ │ │ └─Linear: 3-387 [1, 3, 256] (recursive)
549
+ │ │ └─ReLU: 3-388 [1, 3, 256] --
550
+ │ │ └─Linear: 3-389 [1, 3, 256] (recursive)
551
+ │ └─AttentionOperator: 2-201 -- (recursive)
552
+ │ │ └─ModuleList: 3-500 -- (recursive)
553
+ │ │ └─MLP: 3-391 [1, 3, 3] (recursive)
554
+ │ └─Sequential: 2-200 -- (recursive)
555
+ │ │ └─Linear: 3-392 [1, 3, 256] (recursive)
556
+ │ │ └─ReLU: 3-393 [1, 3, 256] --
557
+ │ │ └─Linear: 3-394 [1, 3, 256] (recursive)
558
+ │ └─AttentionOperator: 2-201 -- (recursive)
559
+ │ │ └─ModuleList: 3-500 -- (recursive)
560
+ │ │ └─MLP: 3-396 [1, 3, 3] (recursive)
561
+ │ └─Sequential: 2-200 -- (recursive)
562
+ │ │ └─Linear: 3-397 [1, 3, 256] (recursive)
563
+ │ │ └─ReLU: 3-398 [1, 3, 256] --
564
+ │ │ └─Linear: 3-399 [1, 3, 256] (recursive)
565
+ │ └─AttentionOperator: 2-201 -- (recursive)
566
+ │ │ └─ModuleList: 3-500 -- (recursive)
567
+ │ │ └─MLP: 3-401 [1, 3, 3] (recursive)
568
+ │ └─Sequential: 2-200 -- (recursive)
569
+ │ │ └─Linear: 3-402 [1, 3, 256] (recursive)
570
+ │ │ └─ReLU: 3-403 [1, 3, 256] --
571
+ │ │ └─Linear: 3-404 [1, 3, 256] (recursive)
572
+ │ └─AttentionOperator: 2-201 -- (recursive)
573
+ │ │ └─ModuleList: 3-500 -- (recursive)
574
+ │ │ └─MLP: 3-406 [1, 3, 3] (recursive)
575
+ │ └─Sequential: 2-200 -- (recursive)
576
+ │ │ └─Linear: 3-407 [1, 3, 256] (recursive)
577
+ │ │ └─ReLU: 3-408 [1, 3, 256] --
578
+ │ │ └─Linear: 3-409 [1, 3, 256] (recursive)
579
+ │ └─AttentionOperator: 2-201 -- (recursive)
580
+ │ �� └─ModuleList: 3-500 -- (recursive)
581
+ │ │ └─MLP: 3-411 [1, 3, 3] (recursive)
582
+ │ └─Sequential: 2-200 -- (recursive)
583
+ │ │ └─Linear: 3-412 [1, 3, 256] (recursive)
584
+ │ │ └─ReLU: 3-413 [1, 3, 256] --
585
+ │ │ └─Linear: 3-414 [1, 3, 256] (recursive)
586
+ │ └─AttentionOperator: 2-201 -- (recursive)
587
+ │ │ └─ModuleList: 3-500 -- (recursive)
588
+ │ │ └─MLP: 3-416 [1, 3, 3] (recursive)
589
+ │ └─Sequential: 2-200 -- (recursive)
590
+ │ │ └─Linear: 3-417 [1, 3, 256] (recursive)
591
+ │ │ └─ReLU: 3-418 [1, 3, 256] --
592
+ │ │ └─Linear: 3-419 [1, 3, 256] (recursive)
593
+ │ └─AttentionOperator: 2-201 -- (recursive)
594
+ │ │ └─ModuleList: 3-500 -- (recursive)
595
+ │ │ └─MLP: 3-421 [1, 3, 3] (recursive)
596
+ │ └─Sequential: 2-200 -- (recursive)
597
+ │ │ └─Linear: 3-422 [1, 3, 256] (recursive)
598
+ │ │ └─ReLU: 3-423 [1, 3, 256] --
599
+ │ │ └─Linear: 3-424 [1, 3, 256] (recursive)
600
+ │ └─AttentionOperator: 2-201 -- (recursive)
601
+ │ │ └─ModuleList: 3-500 -- (recursive)
602
+ │ │ └─MLP: 3-426 [1, 3, 3] (recursive)
603
+ │ └─Sequential: 2-200 -- (recursive)
604
+ │ │ └─Linear: 3-427 [1, 3, 256] (recursive)
605
+ │ │ └─ReLU: 3-428 [1, 3, 256] --
606
+ │ │ └─Linear: 3-429 [1, 3, 256] (recursive)
607
+ │ └─AttentionOperator: 2-201 -- (recursive)
608
+ │ │ └─ModuleList: 3-500 -- (recursive)
609
+ │ │ └─MLP: 3-431 [1, 3, 3] (recursive)
610
+ │ └─Sequential: 2-200 -- (recursive)
611
+ │ │ └─Linear: 3-432 [1, 3, 256] (recursive)
612
+ │ │ └─ReLU: 3-433 [1, 3, 256] --
613
+ │ │ └─Linear: 3-434 [1, 3, 256] (recursive)
614
+ │ └─AttentionOperator: 2-201 -- (recursive)
615
+ │ │ └─ModuleList: 3-500 -- (recursive)
616
+ │ │ └─MLP: 3-436 [1, 3, 3] (recursive)
617
+ │ └─Sequential: 2-200 -- (recursive)
618
+ │ │ └─Linear: 3-437 [1, 3, 256] (recursive)
619
+ │ │ └─ReLU: 3-438 [1, 3, 256] --
620
+ │ │ └─Linear: 3-439 [1, 3, 256] (recursive)
621
+ │ └─AttentionOperator: 2-201 -- (recursive)
622
+ │ │ └─ModuleList: 3-500 -- (recursive)
623
+ │ │ └─MLP: 3-441 [1, 3, 3] (recursive)
624
+ │ └─Sequential: 2-200 -- (recursive)
625
+ │ │ └─Linear: 3-442 [1, 3, 256] (recursive)
626
+ │ │ └─ReLU: 3-443 [1, 3, 256] --
627
+ │ │ └─Linear: 3-444 [1, 3, 256] (recursive)
628
+ │ └─AttentionOperator: 2-201 -- (recursive)
629
+ │ │ └─ModuleList: 3-500 -- (recursive)
630
+ │ │ └─MLP: 3-446 [1, 3, 3] (recursive)
631
+ │ └─Sequential: 2-200 -- (recursive)
632
+ │ │ └─Linear: 3-447 [1, 3, 256] (recursive)
633
+ │ │ └─ReLU: 3-448 [1, 3, 256] --
634
+ │ │ └─Linear: 3-449 [1, 3, 256] (recursive)
635
+ │ └─AttentionOperator: 2-201 -- (recursive)
636
+ │ │ └─ModuleList: 3-500 -- (recursive)
637
+ │ │ └─MLP: 3-451 [1, 3, 3] (recursive)
638
+ │ └─Sequential: 2-200 -- (recursive)
639
+ │ │ └─Linear: 3-452 [1, 3, 256] (recursive)
640
+ │ │ └─ReLU: 3-453 [1, 3, 256] --
641
+ │ │ └─Linear: 3-454 [1, 3, 256] (recursive)
642
+ │ └─AttentionOperator: 2-201 -- (recursive)
643
+ │ │ └─ModuleList: 3-500 -- (recursive)
644
+ │ │ └─MLP: 3-456 [1, 3, 3] (recursive)
645
+ │ └─Sequential: 2-200 -- (recursive)
646
+ │ │ └─Linear: 3-457 [1, 3, 256] (recursive)
647
+ │ │ └─ReLU: 3-458 [1, 3, 256] --
648
+ │ │ └─Linear: 3-459 [1, 3, 256] (recursive)
649
+ │ └─AttentionOperator: 2-201 -- (recursive)
650
+ │ │ └─ModuleList: 3-500 -- (recursive)
651
+ │ │ └─MLP: 3-461 [1, 3, 3] (recursive)
652
+ │ └─Sequential: 2-200 -- (recursive)
653
+ │ │ └─Linear: 3-462 [1, 3, 256] (recursive)
654
+ │ │ └─ReLU: 3-463 [1, 3, 256] --
655
+ │ │ └─Linear: 3-464 [1, 3, 256] (recursive)
656
+ │ └─AttentionOperator: 2-201 -- (recursive)
657
+ │ │ └─ModuleList: 3-500 -- (recursive)
658
+ │ │ └─MLP: 3-466 [1, 3, 3] (recursive)
659
+ │ └─Sequential: 2-200 -- (recursive)
660
+ │ │ └─Linear: 3-467 [1, 3, 256] (recursive)
661
+ │ │ └─ReLU: 3-468 [1, 3, 256] --
662
+ │ │ └─Linear: 3-469 [1, 3, 256] (recursive)
663
+ │ └─AttentionOperator: 2-201 -- (recursive)
664
+ │ │ └─ModuleList: 3-500 -- (recursive)
665
+ │ │ └─MLP: 3-471 [1, 3, 3] (recursive)
666
+ │ └─Sequential: 2-200 -- (recursive)
667
+ │ │ └─Linear: 3-472 [1, 3, 256] (recursive)
668
+ │ │ └─ReLU: 3-473 [1, 3, 256] --
669
+ │ │ └─Linear: 3-474 [1, 3, 256] (recursive)
670
+ │ └─AttentionOperator: 2-201 -- (recursive)
671
+ │ │ └─ModuleList: 3-500 -- (recursive)
672
+ │ │ └─MLP: 3-476 [1, 3, 3] (recursive)
673
+ │ └─Sequential: 2-200 -- (recursive)
674
+ │ │ └─Linear: 3-477 [1, 3, 256] (recursive)
675
+ │ │ └─ReLU: 3-478 [1, 3, 256] --
676
+ │ │ └─Linear: 3-479 [1, 3, 256] (recursive)
677
+ │ └─AttentionOperator: 2-201 -- (recursive)
678
+ │ │ └─ModuleList: 3-500 -- (recursive)
679
+ │ │ └─MLP: 3-481 [1, 3, 3] (recursive)
680
+ │ └─Sequential: 2-200 -- (recursive)
681
+ │ │ └─Linear: 3-482 [1, 3, 256] (recursive)
682
+ │ │ └─ReLU: 3-483 [1, 3, 256] --
683
+ │ │ └─Linear: 3-484 [1, 3, 256] (recursive)
684
+ │ └─AttentionOperator: 2-201 -- (recursive)
685
+ │ │ └─ModuleList: 3-500 -- (recursive)
686
+ │ │ └─MLP: 3-486 [1, 3, 3] (recursive)
687
+ │ └─Sequential: 2-200 -- (recursive)
688
+ │ │ └─Linear: 3-487 [1, 3, 256] (recursive)
689
+ │ │ └─ReLU: 3-488 [1, 3, 256] --
690
+ │ │ └─Linear: 3-489 [1, 3, 256] (recursive)
691
+ │ └─AttentionOperator: 2-201 -- (recursive)
692
+ │ │ └─ModuleList: 3-500 -- (recursive)
693
+ │ │ └─MLP: 3-491 [1, 3, 3] (recursive)
694
+ │ └─Sequential: 2-200 -- (recursive)
695
+ │ │ └─Linear: 3-492 [1, 3, 256] (recursive)
696
+ │ │ └─ReLU: 3-493 [1, 3, 256] --
697
+ │ │ └─Linear: 3-494 [1, 3, 256] (recursive)
698
+ │ └─AttentionOperator: 2-201 -- (recursive)
699
+ │ │ └─ModuleList: 3-500 -- (recursive)
700
+ │ │ └─MLP: 3-496 [1, 3, 3] (recursive)
701
+ │ └─Sequential: 2-200 -- (recursive)
702
+ │ │ └─Linear: 3-497 [1, 3, 256] (recursive)
703
+ │ │ └─ReLU: 3-498 [1, 3, 256] --
704
+ │ │ └─Linear: 3-499 [1, 3, 256] (recursive)
705
+ │ └─AttentionOperator: 2-201 -- (recursive)
706
+ │ │ └─ModuleList: 3-500 -- (recursive)
707
+ │ │ └─MLP: 3-501 [1, 3, 3] (recursive)
708
+ ==============================================================================================================
709
+ Total params: 12,968,068
710
+ Trainable params: 12,968,068
711
+ Non-trainable params: 0
712
+ Total mult-adds (Units.MEGABYTES): 653.30
713
+ ==============================================================================================================
714
+ Input size (MB): 0.00
715
+ Forward/backward pass size (MB): 209.47
716
+ Params size (MB): 32.39
717
+ Estimated Total Size (MB): 241.86
718
+ ==============================================================================================================
vdp1/vdp1_01-29-0707/train_parameters.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ batch_size:
3
+ test: 1
4
+ train: 1
5
+ validation: 1
6
+ data_dirs:
7
+ test: !!python/tuple []
8
+ train: !!python/tuple
9
+ - experiments/odeon/vdp1/data_gpode
10
+ validation: !!python/tuple
11
+ - experiments/odeon/vdp1/data_gpode
12
+ dataset_name:
13
+ test: HeterogeneousFIMSDEDataset
14
+ train: StreamingFIMSDEDataset
15
+ validation: StreamingFIMSDEDataset
16
+ files_to_load:
17
+ locations: locations.h5
18
+ obs_mask: obs_mask.h5
19
+ obs_times: obs_times.h5
20
+ obs_values: obs_values.h5
21
+ max_dim: 3
22
+ name: FIMSDEDataloaderIterableDataset
23
+ num_locations:
24
+ test: null
25
+ train: null
26
+ validation: null
27
+ num_observations:
28
+ test: null
29
+ train: null
30
+ validation: null
31
+ num_workers:
32
+ test: 0
33
+ train: 1
34
+ validation: 1
35
+ shard:
36
+ test: false
37
+ train: false
38
+ validation: false
39
+ shuffle_elements: true
40
+ shuffle_locations:
41
+ test: false
42
+ train: false
43
+ validation: false
44
+ shuffle_paths: true
45
+ distributed:
46
+ activation_chekpoint: false
47
+ checkpoint_type: full_state
48
+ enabled: false
49
+ min_num_params: 1e5
50
+ sharding_strategy: NO_SHARD
51
+ wrap_policy: SIZE_BAZED
52
+ experiment:
53
+ device_map: auto
54
+ name: vdp1
55
+ name_add_date: true
56
+ seed: 10
57
+ model:
58
+ model_config:
59
+ attention_map: softmax
60
+ attention_method: linear
61
+ dim_embed: 256
62
+ dim_feedforward: 1024
63
+ dim_ffn_u_model: 1024
64
+ dim_hidden_u_model: 256
65
+ dim_max_trajectory: 3
66
+ dropout: 0.0
67
+ num_context_encoder_layers: 2
68
+ num_heads: 8
69
+ num_res_layer_u_model: 6
70
+ num_res_layers_functional_decoder: 8
71
+ use_bias_for_projection: true
72
+ use_bias_in_attention: true
73
+ use_query_residual_in_attention: true
74
+ model_type: TrainingWrapper
75
+ train_config:
76
+ corruption_model_type: null
77
+ h_max: null
78
+ ic_noise_scale: 0.0
79
+ integrator_for_trajectory_training: rk4
80
+ intermediate_steps_per_step: 1
81
+ loss_filter_nans: true
82
+ loss_type: l1
83
+ num_ic: 3
84
+ random_initial_conditions: false
85
+ step_noise_scale: 0.0
86
+ train_type: trajectory_reconstruction
87
+ train_with_normalized_head: true
88
+ traj_loss_steps: 25
89
+ use_h_max: false
90
+ optimizers: !!python/tuple
91
+ - optimizer_d:
92
+ gradient_norm_clipping: 1.0
93
+ lr: 1.0e-05
94
+ name: torch.optim.AdamW
95
+ weight_decay: 0.001
96
+ trainer:
97
+ best_metric: loss
98
+ debug_iterations: null
99
+ detect_anomaly: false
100
+ epochs: 1001
101
+ experiment_dir: results/vdp1
102
+ gradient_accumulation_steps: 1
103
+ logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
104
+ name: Trainer
105
+ precision: bf16mixed
106
+ save_every: 1
107
+ schedulers: !!python/tuple
108
+ - beta: 1.0
109
+ label: drift_loss_scale
110
+ name: fim.utils.param_scheduler.ConstantScheduler