picocreator commited on
Commit
cea1c15
1 Parent(s): 8f6731b

Delete actions-runner

Browse files
actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/L6-D512-neox-init.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1e2efad3280524fa2fe7c537e467bede9c6c59e7b8095a8ea2e1e1888d26691
3
- size 143968613
 
 
 
 
actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/trainer-v4-unit-test/model-init.ipynb DELETED
@@ -1,300 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "id": "a6359476",
7
- "metadata": {
8
- "papermill": {
9
- "duration": 0.002323,
10
- "end_time": "2023-08-23T10:26:43.282956",
11
- "exception": false,
12
- "start_time": "2023-08-23T10:26:43.280633",
13
- "status": "completed"
14
- },
15
- "tags": []
16
- },
17
- "source": [
18
- "# Model Init\n",
19
- "\n",
20
- "Test that the model init code, runs without issues\n",
21
- "\n",
22
- "**L6-D512 model with**\n",
23
- "- Layer count: 6\n",
24
- "- Embed size: 512"
25
- ]
26
- },
27
- {
28
- "attachments": {},
29
- "cell_type": "markdown",
30
- "id": "6c73e486",
31
- "metadata": {
32
- "notebookRunGroups": {
33
- "groupValue": ""
34
- },
35
- "papermill": {
36
- "duration": 0.001515,
37
- "end_time": "2023-08-23T10:26:43.286466",
38
- "exception": false,
39
- "start_time": "2023-08-23T10:26:43.284951",
40
- "status": "completed"
41
- },
42
- "tags": []
43
- },
44
- "source": [
45
- "## Preparing the init model and test dataset"
46
- ]
47
- },
48
- {
49
- "cell_type": "code",
50
- "execution_count": 1,
51
- "id": "fcface89",
52
- "metadata": {
53
- "execution": {
54
- "iopub.execute_input": "2023-08-23T10:26:43.291673Z",
55
- "iopub.status.busy": "2023-08-23T10:26:43.291157Z",
56
- "iopub.status.idle": "2023-08-23T10:26:44.037056Z",
57
- "shell.execute_reply": "2023-08-23T10:26:44.036039Z"
58
- },
59
- "papermill": {
60
- "duration": 0.751285,
61
- "end_time": "2023-08-23T10:26:44.039482",
62
- "exception": false,
63
- "start_time": "2023-08-23T10:26:43.288197",
64
- "status": "completed"
65
- },
66
- "tags": []
67
- },
68
- "outputs": [],
69
- "source": [
70
- "# First lets setup the various directories\n",
71
- "!mkdir -p ../../model/\n",
72
- "!mkdir -p ../../datapath/\n",
73
- "!mkdir -p ../../checkpoint/"
74
- ]
75
- },
76
- {
77
- "cell_type": "code",
78
- "execution_count": 2,
79
- "id": "b747f284",
80
- "metadata": {
81
- "execution": {
82
- "iopub.execute_input": "2023-08-23T10:26:44.045133Z",
83
- "iopub.status.busy": "2023-08-23T10:26:44.044626Z",
84
- "iopub.status.idle": "2023-08-23T10:26:53.053696Z",
85
- "shell.execute_reply": "2023-08-23T10:26:53.052569Z"
86
- },
87
- "papermill": {
88
- "duration": 9.015161,
89
- "end_time": "2023-08-23T10:26:53.056640",
90
- "exception": false,
91
- "start_time": "2023-08-23T10:26:44.041479",
92
- "status": "completed"
93
- },
94
- "tags": []
95
- },
96
- "outputs": [
97
- {
98
- "name": "stdout",
99
- "output_type": "stream",
100
- "text": [
101
- "[2023-08-23 10:26:48,317] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
102
- ]
103
- },
104
- {
105
- "name": "stdout",
106
- "output_type": "stream",
107
- "text": [
108
- "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
109
- ]
110
- },
111
- {
112
- "name": "stdout",
113
- "output_type": "stream",
114
- "text": [
115
- "---- Initializing model ----\r\n",
116
- "No of layers: 6\r\n",
117
- "Embedding size: 512\r\n",
118
- "Output model path: ../model/L6-D512-neox-init.pth\r\n",
119
- "Vocab size: 50277\r\n",
120
- "Note: this process takes a significant time (and ram) for large models\r\n",
121
- "---- ----- ----\r\n"
122
- ]
123
- },
124
- {
125
- "name": "stdout",
126
- "output_type": "stream",
127
- "text": [
128
- "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n"
129
- ]
130
- },
131
- {
132
- "name": "stdout",
133
- "output_type": "stream",
134
- "text": [
135
- "Detected CUDA files, patching ldflags\r\n",
136
- "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/wkv_1_bf16/build.ninja...\r\n"
137
- ]
138
- },
139
- {
140
- "name": "stdout",
141
- "output_type": "stream",
142
- "text": [
143
- "Building extension module wkv_1_bf16...\r\n",
144
- "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
145
- "ninja: no work to do.\r\n",
146
- "Loading extension module wkv_1_bf16...\r\n"
147
- ]
148
- },
149
- {
150
- "name": "stdout",
151
- "output_type": "stream",
152
- "text": [
153
- "[RWKV.model]: Finished initial model load\r\n",
154
- "50277 512 -0.0001 emb.weight\r\n"
155
- ]
156
- },
157
- {
158
- "name": "stdout",
159
- "output_type": "stream",
160
- "text": [
161
- "512 512 0 blocks.0.att.key.weight\r\n",
162
- "512 512 1.0 blocks.0.att.value.weight\r\n",
163
- "512 512 0 blocks.0.att.receptance.weight\r\n",
164
- "512 512 0 blocks.0.att.output.weight\r\n",
165
- "2048 512 1.0 blocks.0.ffn.key.weight\r\n"
166
- ]
167
- },
168
- {
169
- "name": "stdout",
170
- "output_type": "stream",
171
- "text": [
172
- "512 512 0 blocks.0.ffn.receptance.weight\r\n",
173
- "512 2048 0 blocks.0.ffn.value.weight\r\n",
174
- "512 512 0 blocks.1.att.key.weight\r\n",
175
- "512 512 1.0 blocks.1.att.value.weight\r\n"
176
- ]
177
- },
178
- {
179
- "name": "stdout",
180
- "output_type": "stream",
181
- "text": [
182
- "512 512 0 blocks.1.att.receptance.weight\r\n",
183
- "512 512 0 blocks.1.att.output.weight\r\n",
184
- "2048 512 1.0 blocks.1.ffn.key.weight\r\n"
185
- ]
186
- },
187
- {
188
- "name": "stdout",
189
- "output_type": "stream",
190
- "text": [
191
- "512 512 0 blocks.1.ffn.receptance.weight\r\n",
192
- "512 2048 0 blocks.1.ffn.value.weight\r\n",
193
- "512 512 0 blocks.2.att.key.weight\r\n",
194
- "512 512 1.0 blocks.2.att.value.weight\r\n",
195
- "512 512 0 blocks.2.att.receptance.weight\r\n",
196
- "512 512 0 blocks.2.att.output.weight\r\n",
197
- "2048 512 1.0 blocks.2.ffn.key.weight\r\n"
198
- ]
199
- },
200
- {
201
- "name": "stdout",
202
- "output_type": "stream",
203
- "text": [
204
- "512 512 0 blocks.2.ffn.receptance.weight\r\n",
205
- "512 2048 0 blocks.2.ffn.value.weight\r\n",
206
- "512 512 0 blocks.3.att.key.weight\r\n",
207
- "512 512 1.0 blocks.3.att.value.weight\r\n"
208
- ]
209
- },
210
- {
211
- "name": "stdout",
212
- "output_type": "stream",
213
- "text": [
214
- "512 512 0 blocks.3.att.receptance.weight\r\n",
215
- "512 512 0 blocks.3.att.output.weight\r\n",
216
- "2048 512 1.0 blocks.3.ffn.key.weight\r\n"
217
- ]
218
- },
219
- {
220
- "name": "stdout",
221
- "output_type": "stream",
222
- "text": [
223
- "512 512 0 blocks.3.ffn.receptance.weight\r\n",
224
- "512 2048 0 blocks.3.ffn.value.weight\r\n",
225
- "512 512 0 blocks.4.att.key.weight\r\n",
226
- "512 512 1.0 blocks.4.att.value.weight\r\n",
227
- "512 512 0 blocks.4.att.receptance.weight\r\n",
228
- "512 512 0 blocks.4.att.output.weight\r\n",
229
- "2048 512 1.0 blocks.4.ffn.key.weight\r\n"
230
- ]
231
- },
232
- {
233
- "name": "stdout",
234
- "output_type": "stream",
235
- "text": [
236
- "512 512 0 blocks.4.ffn.receptance.weight\r\n",
237
- "512 2048 0 blocks.4.ffn.value.weight\r\n",
238
- "512 512 0 blocks.5.att.key.weight\r\n",
239
- "512 512 1.0 blocks.5.att.value.weight\r\n"
240
- ]
241
- },
242
- {
243
- "name": "stdout",
244
- "output_type": "stream",
245
- "text": [
246
- "512 512 0 blocks.5.att.receptance.weight\r\n",
247
- "512 512 0 blocks.5.att.output.weight\r\n",
248
- "2048 512 1.0 blocks.5.ffn.key.weight\r\n"
249
- ]
250
- },
251
- {
252
- "name": "stdout",
253
- "output_type": "stream",
254
- "text": [
255
- "512 512 0 blocks.5.ffn.receptance.weight\r\n",
256
- "512 2048 0 blocks.5.ffn.value.weight\r\n",
257
- "50277 512 0.5 head.weight\r\n"
258
- ]
259
- }
260
- ],
261
- "source": [
262
- "# Lets initialized the L6-D512 model with the init_model.py code\n",
263
- "!cd ../../RWKV-v4neo/ && python3 init_model.py --n_layer 6 --n_embd 512 --vocab_size neox ../model/L6-D512-neox-init.pth"
264
- ]
265
- }
266
- ],
267
- "metadata": {
268
- "kernelspec": {
269
- "display_name": "rwkv-exp",
270
- "language": "python",
271
- "name": "python3"
272
- },
273
- "language_info": {
274
- "codemirror_mode": {
275
- "name": "ipython",
276
- "version": 3
277
- },
278
- "file_extension": ".py",
279
- "mimetype": "text/x-python",
280
- "name": "python",
281
- "nbconvert_exporter": "python",
282
- "pygments_lexer": "ipython3",
283
- "version": "3.10.12"
284
- },
285
- "papermill": {
286
- "default_parameters": {},
287
- "duration": 11.337437,
288
- "end_time": "2023-08-23T10:26:53.383943",
289
- "environment_variables": {},
290
- "exception": null,
291
- "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
292
- "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
293
- "parameters": {},
294
- "start_time": "2023-08-23T10:26:42.046506",
295
- "version": "2.4.0"
296
- }
297
- },
298
- "nbformat": 4,
299
- "nbformat_minor": 5
300
- }