picocreator commited on
Commit
b9f892f
1 Parent(s): 5c1dadb

[GHA] trainer-v4-unit-test/model-init.ipynb result notebook & reports (fallback single file upload)

Browse files
actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "id": "add4ffdc",
7
+ "metadata": {
8
+ "papermill": {
9
+ "duration": 0.002305,
10
+ "end_time": "2023-08-28T13:44:02.939598",
11
+ "exception": false,
12
+ "start_time": "2023-08-28T13:44:02.937293",
13
+ "status": "completed"
14
+ },
15
+ "tags": []
16
+ },
17
+ "source": [
18
+ "# Model Init\n",
19
+ "\n",
20
+ "Test that the model init code, runs without issues\n",
21
+ "\n",
22
+ "**L6-D512 model with**\n",
23
+ "- Layer count: 6\n",
24
+ "- Embed size: 512"
25
+ ]
26
+ },
27
+ {
28
+ "attachments": {},
29
+ "cell_type": "markdown",
30
+ "id": "c76a6c4a",
31
+ "metadata": {
32
+ "notebookRunGroups": {
33
+ "groupValue": ""
34
+ },
35
+ "papermill": {
36
+ "duration": 0.001664,
37
+ "end_time": "2023-08-28T13:44:02.943074",
38
+ "exception": false,
39
+ "start_time": "2023-08-28T13:44:02.941410",
40
+ "status": "completed"
41
+ },
42
+ "tags": []
43
+ },
44
+ "source": [
45
+ "## Preparing the init model and test dataset"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 1,
51
+ "id": "e78b4188",
52
+ "metadata": {
53
+ "execution": {
54
+ "iopub.execute_input": "2023-08-28T13:44:02.948005Z",
55
+ "iopub.status.busy": "2023-08-28T13:44:02.947532Z",
56
+ "iopub.status.idle": "2023-08-28T13:44:03.704065Z",
57
+ "shell.execute_reply": "2023-08-28T13:44:03.703078Z"
58
+ },
59
+ "papermill": {
60
+ "duration": 0.761733,
61
+ "end_time": "2023-08-28T13:44:03.706443",
62
+ "exception": false,
63
+ "start_time": "2023-08-28T13:44:02.944710",
64
+ "status": "completed"
65
+ },
66
+ "tags": []
67
+ },
68
+ "outputs": [],
69
+ "source": [
70
+ "# First lets setup the various directories\n",
71
+ "!mkdir -p ../../model/\n",
72
+ "!mkdir -p ../../datapath/\n",
73
+ "!mkdir -p ../../checkpoint/"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 2,
79
+ "id": "d49245a7",
80
+ "metadata": {
81
+ "execution": {
82
+ "iopub.execute_input": "2023-08-28T13:44:03.711824Z",
83
+ "iopub.status.busy": "2023-08-28T13:44:03.711511Z",
84
+ "iopub.status.idle": "2023-08-28T13:44:13.050209Z",
85
+ "shell.execute_reply": "2023-08-28T13:44:13.049137Z"
86
+ },
87
+ "papermill": {
88
+ "duration": 9.344306,
89
+ "end_time": "2023-08-28T13:44:13.052848",
90
+ "exception": false,
91
+ "start_time": "2023-08-28T13:44:03.708542",
92
+ "status": "completed"
93
+ },
94
+ "tags": []
95
+ },
96
+ "outputs": [
97
+ {
98
+ "name": "stdout",
99
+ "output_type": "stream",
100
+ "text": [
101
+ "[2023-08-28 13:44:08,111] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
102
+ ]
103
+ },
104
+ {
105
+ "name": "stdout",
106
+ "output_type": "stream",
107
+ "text": [
108
+ "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
109
+ "---- Initializing model ----\r\n",
110
+ "No of layers: 6\r\n",
111
+ "Embedding size: 512\r\n",
112
+ "Output model path: ../model/L6-D512-neox-init.pth\r\n",
113
+ "Vocab size: 50277\r\n",
114
+ "Note: this process takes a significant time (and ram) for large models\r\n",
115
+ "---- ----- ----\r\n"
116
+ ]
117
+ },
118
+ {
119
+ "name": "stdout",
120
+ "output_type": "stream",
121
+ "text": [
122
+ "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n"
123
+ ]
124
+ },
125
+ {
126
+ "name": "stdout",
127
+ "output_type": "stream",
128
+ "text": [
129
+ "Detected CUDA files, patching ldflags\r\n",
130
+ "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/wkv_1_bf16/build.ninja...\r\n"
131
+ ]
132
+ },
133
+ {
134
+ "name": "stdout",
135
+ "output_type": "stream",
136
+ "text": [
137
+ "Building extension module wkv_1_bf16...\r\n",
138
+ "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
139
+ "ninja: no work to do.\r\n",
140
+ "Loading extension module wkv_1_bf16...\r\n"
141
+ ]
142
+ },
143
+ {
144
+ "name": "stdout",
145
+ "output_type": "stream",
146
+ "text": [
147
+ "[RWKV.model]: Finished initial model load\r\n",
148
+ "50277 512 -0.0001 emb.weight\r\n"
149
+ ]
150
+ },
151
+ {
152
+ "name": "stdout",
153
+ "output_type": "stream",
154
+ "text": [
155
+ "512 512 0 blocks.0.att.key.weight\r\n",
156
+ "512 512 1.0 blocks.0.att.value.weight\r\n"
157
+ ]
158
+ },
159
+ {
160
+ "name": "stdout",
161
+ "output_type": "stream",
162
+ "text": [
163
+ "512 512 0 blocks.0.att.receptance.weight\r\n",
164
+ "512 512 0 blocks.0.att.output.weight\r\n",
165
+ "2048 512 1.0 blocks.0.ffn.key.weight\r\n"
166
+ ]
167
+ },
168
+ {
169
+ "name": "stdout",
170
+ "output_type": "stream",
171
+ "text": [
172
+ "512 512 0 blocks.0.ffn.receptance.weight\r\n",
173
+ "512 2048 0 blocks.0.ffn.value.weight\r\n",
174
+ "512 512 0 blocks.1.att.key.weight\r\n",
175
+ "512 512 1.0 blocks.1.att.value.weight\r\n",
176
+ "512 512 0 blocks.1.att.receptance.weight\r\n",
177
+ "512 512 0 blocks.1.att.output.weight\r\n",
178
+ "2048 512 1.0 blocks.1.ffn.key.weight\r\n"
179
+ ]
180
+ },
181
+ {
182
+ "name": "stdout",
183
+ "output_type": "stream",
184
+ "text": [
185
+ "512 512 0 blocks.1.ffn.receptance.weight\r\n",
186
+ "512 2048 0 blocks.1.ffn.value.weight\r\n",
187
+ "512 512 0 blocks.2.att.key.weight\r\n",
188
+ "512 512 1.0 blocks.2.att.value.weight\r\n"
189
+ ]
190
+ },
191
+ {
192
+ "name": "stdout",
193
+ "output_type": "stream",
194
+ "text": [
195
+ "512 512 0 blocks.2.att.receptance.weight\r\n",
196
+ "512 512 0 blocks.2.att.output.weight\r\n",
197
+ "2048 512 1.0 blocks.2.ffn.key.weight\r\n"
198
+ ]
199
+ },
200
+ {
201
+ "name": "stdout",
202
+ "output_type": "stream",
203
+ "text": [
204
+ "512 512 0 blocks.2.ffn.receptance.weight\r\n",
205
+ "512 2048 0 blocks.2.ffn.value.weight\r\n",
206
+ "512 512 0 blocks.3.att.key.weight\r\n",
207
+ "512 512 1.0 blocks.3.att.value.weight\r\n",
208
+ "512 512 0 blocks.3.att.receptance.weight\r\n",
209
+ "512 512 0 blocks.3.att.output.weight\r\n",
210
+ "2048 512 1.0 blocks.3.ffn.key.weight\r\n"
211
+ ]
212
+ },
213
+ {
214
+ "name": "stdout",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "512 512 0 blocks.3.ffn.receptance.weight\r\n",
218
+ "512 2048 0 blocks.3.ffn.value.weight\r\n",
219
+ "512 512 0 blocks.4.att.key.weight\r\n"
220
+ ]
221
+ },
222
+ {
223
+ "name": "stdout",
224
+ "output_type": "stream",
225
+ "text": [
226
+ "512 512 1.0 blocks.4.att.value.weight\r\n",
227
+ "512 512 0 blocks.4.att.receptance.weight\r\n",
228
+ "512 512 0 blocks.4.att.output.weight\r\n",
229
+ "2048 512 1.0 blocks.4.ffn.key.weight\r\n"
230
+ ]
231
+ },
232
+ {
233
+ "name": "stdout",
234
+ "output_type": "stream",
235
+ "text": [
236
+ "512 512 0 blocks.4.ffn.receptance.weight\r\n",
237
+ "512 2048 0 blocks.4.ffn.value.weight\r\n",
238
+ "512 512 0 blocks.5.att.key.weight\r\n",
239
+ "512 512 1.0 blocks.5.att.value.weight\r\n",
240
+ "512 512 0 blocks.5.att.receptance.weight\r\n",
241
+ "512 512 0 blocks.5.att.output.weight\r\n",
242
+ "2048 512 1.0 blocks.5.ffn.key.weight\r\n"
243
+ ]
244
+ },
245
+ {
246
+ "name": "stdout",
247
+ "output_type": "stream",
248
+ "text": [
249
+ "512 512 0 blocks.5.ffn.receptance.weight\r\n",
250
+ "512 2048 0 blocks.5.ffn.value.weight\r\n",
251
+ "50277 512 0.5 head.weight\r\n"
252
+ ]
253
+ }
254
+ ],
255
+ "source": [
256
+ "# Lets initialized the L6-D512 model with the init_model.py code\n",
257
+ "!cd ../../RWKV-v4neo/ && python3 init_model.py \\\n",
258
+ " --n_layer 6 --n_embd 512 \\\n",
259
+ " --vocab_size neox \\\n",
260
+ " --skip-if-exists --safe-init \\\n",
261
+ " ../model/L6-D512-neox-init.pth"
262
+ ]
263
+ }
264
+ ],
265
+ "metadata": {
266
+ "kernelspec": {
267
+ "display_name": "rwkv-exp",
268
+ "language": "python",
269
+ "name": "python3"
270
+ },
271
+ "language_info": {
272
+ "codemirror_mode": {
273
+ "name": "ipython",
274
+ "version": 3
275
+ },
276
+ "file_extension": ".py",
277
+ "mimetype": "text/x-python",
278
+ "name": "python",
279
+ "nbconvert_exporter": "python",
280
+ "pygments_lexer": "ipython3",
281
+ "version": "3.10.12"
282
+ },
283
+ "papermill": {
284
+ "default_parameters": {},
285
+ "duration": 11.767337,
286
+ "end_time": "2023-08-28T13:44:13.477688",
287
+ "environment_variables": {},
288
+ "exception": null,
289
+ "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
290
+ "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
291
+ "parameters": {},
292
+ "start_time": "2023-08-28T13:44:01.710351",
293
+ "version": "2.4.0"
294
+ }
295
+ },
296
+ "nbformat": 4,
297
+ "nbformat_minor": 5
298
+ }