picocreator commited on
Commit
8f6731b
1 Parent(s): 3cd4057

Delete trainer-v4-unit-test

Browse files
trainer-v4-unit-test/L6-D512-neox-init.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3d06f5cb8762098d57586d37f251cecb27464dfe16955f2c0a9da4781f1ed17
3
- size 143968613
 
 
 
 
trainer-v4-unit-test/trainer-v4-unit-test/model-init.ipynb DELETED
@@ -1,294 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "id": "f8d0f2a8",
7
- "metadata": {
8
- "papermill": {
9
- "duration": 0.001857,
10
- "end_time": "2023-08-23T10:30:31.380683",
11
- "exception": false,
12
- "start_time": "2023-08-23T10:30:31.378826",
13
- "status": "completed"
14
- },
15
- "tags": []
16
- },
17
- "source": [
18
- "# Model Init\n",
19
- "\n",
20
- "Test that the model init code, runs without issues\n",
21
- "\n",
22
- "**L6-D512 model with**\n",
23
- "- Layer count: 6\n",
24
- "- Embed size: 512"
25
- ]
26
- },
27
- {
28
- "attachments": {},
29
- "cell_type": "markdown",
30
- "id": "5877c2a8",
31
- "metadata": {
32
- "notebookRunGroups": {
33
- "groupValue": ""
34
- },
35
- "papermill": {
36
- "duration": 0.001252,
37
- "end_time": "2023-08-23T10:30:31.383486",
38
- "exception": false,
39
- "start_time": "2023-08-23T10:30:31.382234",
40
- "status": "completed"
41
- },
42
- "tags": []
43
- },
44
- "source": [
45
- "## Preparing the init model and test dataset"
46
- ]
47
- },
48
- {
49
- "cell_type": "code",
50
- "execution_count": 1,
51
- "id": "a6979aad",
52
- "metadata": {
53
- "execution": {
54
- "iopub.execute_input": "2023-08-23T10:30:31.389495Z",
55
- "iopub.status.busy": "2023-08-23T10:30:31.388151Z",
56
- "iopub.status.idle": "2023-08-23T10:30:32.146278Z",
57
- "shell.execute_reply": "2023-08-23T10:30:32.144823Z"
58
- },
59
- "papermill": {
60
- "duration": 0.76389,
61
- "end_time": "2023-08-23T10:30:32.148710",
62
- "exception": false,
63
- "start_time": "2023-08-23T10:30:31.384820",
64
- "status": "completed"
65
- },
66
- "tags": []
67
- },
68
- "outputs": [],
69
- "source": [
70
- "# First lets setup the various directories\n",
71
- "!mkdir -p ../../model/\n",
72
- "!mkdir -p ../../datapath/\n",
73
- "!mkdir -p ../../checkpoint/"
74
- ]
75
- },
76
- {
77
- "cell_type": "code",
78
- "execution_count": 2,
79
- "id": "831f03fd",
80
- "metadata": {
81
- "execution": {
82
- "iopub.execute_input": "2023-08-23T10:30:32.155121Z",
83
- "iopub.status.busy": "2023-08-23T10:30:32.153892Z",
84
- "iopub.status.idle": "2023-08-23T10:30:41.918539Z",
85
- "shell.execute_reply": "2023-08-23T10:30:41.917396Z"
86
- },
87
- "papermill": {
88
- "duration": 9.770776,
89
- "end_time": "2023-08-23T10:30:41.921168",
90
- "exception": false,
91
- "start_time": "2023-08-23T10:30:32.150392",
92
- "status": "completed"
93
- },
94
- "tags": []
95
- },
96
- "outputs": [
97
- {
98
- "name": "stdout",
99
- "output_type": "stream",
100
- "text": [
101
- "[2023-08-23 10:30:36,724] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
102
- ]
103
- },
104
- {
105
- "name": "stdout",
106
- "output_type": "stream",
107
- "text": [
108
- "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
109
- "---- Initializing model ----\r\n",
110
- "No of layers: 6\r\n",
111
- "Embedding size: 512\r\n",
112
- "Output model path: ../model/L6-D512-neox-init.pth\r\n",
113
- "Vocab size: 50277\r\n",
114
- "Note: this process takes a significant time (and ram) for large models\r\n",
115
- "---- ----- ----\r\n"
116
- ]
117
- },
118
- {
119
- "name": "stdout",
120
- "output_type": "stream",
121
- "text": [
122
- "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n"
123
- ]
124
- },
125
- {
126
- "name": "stdout",
127
- "output_type": "stream",
128
- "text": [
129
- "Detected CUDA files, patching ldflags\r\n",
130
- "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/wkv_1_bf16/build.ninja...\r\n",
131
- "Building extension module wkv_1_bf16...\r\n",
132
- "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n"
133
- ]
134
- },
135
- {
136
- "name": "stdout",
137
- "output_type": "stream",
138
- "text": [
139
- "ninja: no work to do.\r\n",
140
- "Loading extension module wkv_1_bf16...\r\n"
141
- ]
142
- },
143
- {
144
- "name": "stdout",
145
- "output_type": "stream",
146
- "text": [
147
- "[RWKV.model]: Finished initial model load\r\n",
148
- "50277 512 -0.0001 emb.weight\r\n"
149
- ]
150
- },
151
- {
152
- "name": "stdout",
153
- "output_type": "stream",
154
- "text": [
155
- "512 512 0 blocks.0.att.key.weight\r\n",
156
- "512 512 1.0 blocks.0.att.value.weight\r\n",
157
- "512 512 0 blocks.0.att.receptance.weight\r\n",
158
- "512 512 0 blocks.0.att.output.weight\r\n",
159
- "2048 512 1.0 blocks.0.ffn.key.weight\r\n"
160
- ]
161
- },
162
- {
163
- "name": "stdout",
164
- "output_type": "stream",
165
- "text": [
166
- "512 512 0 blocks.0.ffn.receptance.weight\r\n",
167
- "512 2048 0 blocks.0.ffn.value.weight\r\n",
168
- "512 512 0 blocks.1.att.key.weight\r\n",
169
- "512 512 1.0 blocks.1.att.value.weight\r\n"
170
- ]
171
- },
172
- {
173
- "name": "stdout",
174
- "output_type": "stream",
175
- "text": [
176
- "512 512 0 blocks.1.att.receptance.weight\r\n",
177
- "512 512 0 blocks.1.att.output.weight\r\n",
178
- "2048 512 1.0 blocks.1.ffn.key.weight\r\n"
179
- ]
180
- },
181
- {
182
- "name": "stdout",
183
- "output_type": "stream",
184
- "text": [
185
- "512 512 0 blocks.1.ffn.receptance.weight\r\n",
186
- "512 2048 0 blocks.1.ffn.value.weight\r\n",
187
- "512 512 0 blocks.2.att.key.weight\r\n",
188
- "512 512 1.0 blocks.2.att.value.weight\r\n",
189
- "512 512 0 blocks.2.att.receptance.weight\r\n",
190
- "512 512 0 blocks.2.att.output.weight\r\n",
191
- "2048 512 1.0 blocks.2.ffn.key.weight\r\n"
192
- ]
193
- },
194
- {
195
- "name": "stdout",
196
- "output_type": "stream",
197
- "text": [
198
- "512 512 0 blocks.2.ffn.receptance.weight\r\n",
199
- "512 2048 0 blocks.2.ffn.value.weight\r\n",
200
- "512 512 0 blocks.3.att.key.weight\r\n",
201
- "512 512 1.0 blocks.3.att.value.weight\r\n"
202
- ]
203
- },
204
- {
205
- "name": "stdout",
206
- "output_type": "stream",
207
- "text": [
208
- "512 512 0 blocks.3.att.receptance.weight\r\n",
209
- "512 512 0 blocks.3.att.output.weight\r\n",
210
- "2048 512 1.0 blocks.3.ffn.key.weight\r\n",
211
- "512 512 0 blocks.3.ffn.receptance.weight\r\n",
212
- "512 2048 0 blocks.3.ffn.value.weight\r\n"
213
- ]
214
- },
215
- {
216
- "name": "stdout",
217
- "output_type": "stream",
218
- "text": [
219
- "512 512 0 blocks.4.att.key.weight\r\n",
220
- "512 512 1.0 blocks.4.att.value.weight\r\n",
221
- "512 512 0 blocks.4.att.receptance.weight\r\n",
222
- "512 512 0 blocks.4.att.output.weight\r\n",
223
- "2048 512 1.0 blocks.4.ffn.key.weight\r\n"
224
- ]
225
- },
226
- {
227
- "name": "stdout",
228
- "output_type": "stream",
229
- "text": [
230
- "512 512 0 blocks.4.ffn.receptance.weight\r\n",
231
- "512 2048 0 blocks.4.ffn.value.weight\r\n",
232
- "512 512 0 blocks.5.att.key.weight\r\n",
233
- "512 512 1.0 blocks.5.att.value.weight\r\n",
234
- "512 512 0 blocks.5.att.receptance.weight\r\n",
235
- "512 512 0 blocks.5.att.output.weight\r\n"
236
- ]
237
- },
238
- {
239
- "name": "stdout",
240
- "output_type": "stream",
241
- "text": [
242
- "2048 512 1.0 blocks.5.ffn.key.weight\r\n",
243
- "512 512 0 blocks.5.ffn.receptance.weight\r\n",
244
- "512 2048 0 blocks.5.ffn.value.weight\r\n"
245
- ]
246
- },
247
- {
248
- "name": "stdout",
249
- "output_type": "stream",
250
- "text": [
251
- "50277 512 0.5 head.weight\r\n"
252
- ]
253
- }
254
- ],
255
- "source": [
256
- "# Lets initialized the L6-D512 model with the init_model.py code\n",
257
- "!cd ../../RWKV-v4neo/ && python3 init_model.py --n_layer 6 --n_embd 512 --vocab_size neox ../model/L6-D512-neox-init.pth"
258
- ]
259
- }
260
- ],
261
- "metadata": {
262
- "kernelspec": {
263
- "display_name": "rwkv-exp",
264
- "language": "python",
265
- "name": "python3"
266
- },
267
- "language_info": {
268
- "codemirror_mode": {
269
- "name": "ipython",
270
- "version": 3
271
- },
272
- "file_extension": ".py",
273
- "mimetype": "text/x-python",
274
- "name": "python",
275
- "nbconvert_exporter": "python",
276
- "pygments_lexer": "ipython3",
277
- "version": "3.10.12"
278
- },
279
- "papermill": {
280
- "default_parameters": {},
281
- "duration": 12.324741,
282
- "end_time": "2023-08-23T10:30:42.347605",
283
- "environment_variables": {},
284
- "exception": null,
285
- "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
286
- "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
287
- "parameters": {},
288
- "start_time": "2023-08-23T10:30:30.022864",
289
- "version": "2.4.0"
290
- }
291
- },
292
- "nbformat": 4,
293
- "nbformat_minor": 5
294
- }