[GHA] trainer-v4-unit-test/model-init.ipynb result notebook & reports (fallback single file upload)

Browse files

Files changed (1) hide show

actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb +298 -0

actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "add4ffdc",
+   "metadata": {
+    "papermill": {
+     "duration": 0.002305,
+     "end_time": "2023-08-28T13:44:02.939598",
+     "exception": false,
+     "start_time": "2023-08-28T13:44:02.937293",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Model Init\n",
+    "\n",
+    "Test that the model init code, runs without issues\n",
+    "\n",
+    "**L6-D512 model with**\n",
+    "- Layer count: 6\n",
+    "- Embed size: 512"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "c76a6c4a",
+   "metadata": {
+    "notebookRunGroups": {
+     "groupValue": ""
+    },
+    "papermill": {
+     "duration": 0.001664,
+     "end_time": "2023-08-28T13:44:02.943074",
+     "exception": false,
+     "start_time": "2023-08-28T13:44:02.941410",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Preparing the init model and test dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e78b4188",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-28T13:44:02.948005Z",
+     "iopub.status.busy": "2023-08-28T13:44:02.947532Z",
+     "iopub.status.idle": "2023-08-28T13:44:03.704065Z",
+     "shell.execute_reply": "2023-08-28T13:44:03.703078Z"
+    },
+    "papermill": {
+     "duration": 0.761733,
+     "end_time": "2023-08-28T13:44:03.706443",
+     "exception": false,
+     "start_time": "2023-08-28T13:44:02.944710",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# First lets setup the various directories\n",
+    "!mkdir -p ../../model/\n",
+    "!mkdir -p ../../datapath/\n",
+    "!mkdir -p ../../checkpoint/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d49245a7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-28T13:44:03.711824Z",
+     "iopub.status.busy": "2023-08-28T13:44:03.711511Z",
+     "iopub.status.idle": "2023-08-28T13:44:13.050209Z",
+     "shell.execute_reply": "2023-08-28T13:44:13.049137Z"
+    },
+    "papermill": {
+     "duration": 9.344306,
+     "end_time": "2023-08-28T13:44:13.052848",
+     "exception": false,
+     "start_time": "2023-08-28T13:44:03.708542",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2023-08-28 13:44:08,111] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "---- Initializing model ----\r\n",
+      "No of layers: 6\r\n",
+      "Embedding size: 512\r\n",
+      "Output model path: ../model/L6-D512-neox-init.pth\r\n",
+      "Vocab size: 50277\r\n",
+      "Note: this process takes a significant time (and ram) for large models\r\n",
+      "---- ----- ----\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Detected CUDA files, patching ldflags\r\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/wkv_1_bf16/build.ninja...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building extension module wkv_1_bf16...\r\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
+      "ninja: no work to do.\r\n",
+      "Loading extension module wkv_1_bf16...\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[RWKV.model]: Finished initial model load\r\n",
+      "50277 512   -0.0001 emb.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.0.att.key.weight\r\n",
+      "512   512   1.0  blocks.0.att.value.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.0.att.receptance.weight\r\n",
+      "512   512   0    blocks.0.att.output.weight\r\n",
+      "2048  512   1.0  blocks.0.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.0.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.0.ffn.value.weight\r\n",
+      "512   512   0    blocks.1.att.key.weight\r\n",
+      "512   512   1.0  blocks.1.att.value.weight\r\n",
+      "512   512   0    blocks.1.att.receptance.weight\r\n",
+      "512   512   0    blocks.1.att.output.weight\r\n",
+      "2048  512   1.0  blocks.1.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.1.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.1.ffn.value.weight\r\n",
+      "512   512   0    blocks.2.att.key.weight\r\n",
+      "512   512   1.0  blocks.2.att.value.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.2.att.receptance.weight\r\n",
+      "512   512   0    blocks.2.att.output.weight\r\n",
+      "2048  512   1.0  blocks.2.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.2.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.2.ffn.value.weight\r\n",
+      "512   512   0    blocks.3.att.key.weight\r\n",
+      "512   512   1.0  blocks.3.att.value.weight\r\n",
+      "512   512   0    blocks.3.att.receptance.weight\r\n",
+      "512   512   0    blocks.3.att.output.weight\r\n",
+      "2048  512   1.0  blocks.3.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.3.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.3.ffn.value.weight\r\n",
+      "512   512   0    blocks.4.att.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   1.0  blocks.4.att.value.weight\r\n",
+      "512   512   0    blocks.4.att.receptance.weight\r\n",
+      "512   512   0    blocks.4.att.output.weight\r\n",
+      "2048  512   1.0  blocks.4.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.4.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.4.ffn.value.weight\r\n",
+      "512   512   0    blocks.5.att.key.weight\r\n",
+      "512   512   1.0  blocks.5.att.value.weight\r\n",
+      "512   512   0    blocks.5.att.receptance.weight\r\n",
+      "512   512   0    blocks.5.att.output.weight\r\n",
+      "2048  512   1.0  blocks.5.ffn.key.weight\r\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "512   512   0    blocks.5.ffn.receptance.weight\r\n",
+      "512   2048  0    blocks.5.ffn.value.weight\r\n",
+      "50277 512   0.5  head.weight\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets initialized the L6-D512 model with the init_model.py code\n",
+    "!cd ../../RWKV-v4neo/ && python3 init_model.py \\\n",
+    "    --n_layer 6 --n_embd 512 \\\n",
+    "    --vocab_size neox \\\n",
+    "    --skip-if-exists --safe-init \\\n",
+    "    ../model/L6-D512-neox-init.pth"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "rwkv-exp",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 11.767337,
+   "end_time": "2023-08-28T13:44:13.477688",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
+   "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
+   "parameters": {},
+   "start_time": "2023-08-28T13:44:01.710351",
+   "version": "2.4.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}