Export LightGPT small at 150 epochs

Files changed (5) hide show

README.md +2 -2
export_model.ipynb +16 -19
exports/lightgpt-small.onnx +3 -0
exports/lightgpt-small.safetensors +3 -0
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -9,11 +9,11 @@ metrics:
 - perplexity
 pipeline_tag: text-generation
 tags:
-- LightGPT
 ---
 # LightGPT
-LightGPT is a lightweight generative pretrained Transformer (GPT) model for the people! Built using PyTorch and trained on the Fineweb and Alpaca datasets, LightGPT can answer questions, follow instructions, summarize documents, chat, and more. Best of all, the model weights *and* code are fully open-source for you to customize, improve upon, and share with the world.
 ## Features

 - perplexity
 pipeline_tag: text-generation
 tags:
+- NoPE
 ---
 # LightGPT
+LightGPT is a lightweight generative pretrained Transformer (GPT) language model for the people! Built using PyTorch and trained on the Fineweb and Alpaca datasets, LightGPT can answer questions, follow instructions, summarize documents, chat, and more. Best of all, the model weights *and* code are fully open-source for you to customize, improve upon, and share with the world.
 ## Features

export_model.ipynb CHANGED Viewed

@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -93,7 +93,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -125,23 +125,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/andrew/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/onnx/_internal/_exporter_legacy.py:116: UserWarning: torch.onnx.dynamo_export only implements opset version 18 for now. If you need to use a different opset version, please register them with register_custom_op.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Applied 72 of general pattern rewrite rules.\n",
-      "Model saved to ./exports/lightgpt-small.onnx\n"
      ]
     }
    ],
@@ -150,7 +147,7 @@
     "\n",
     "from torch.onnx import dynamo_export, ExportOptions\n",
     "\n",
-    "example_input = torch.randint(0, model.vocabulary_size - 1, (1, model.block_size))\n",
     "\n",
     "model = ONNXModel(model)  # Nicer inferencing API\n",
     "\n",

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
+     "ename": "AttributeError",
+     "evalue": "'GPT' object has no attribute 'block_size'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 5\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ONNXModel\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01monnx\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dynamo_export, ExportOptions\n\u001b[0;32m----> 5\u001b[0m example_input \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m0\u001b[39m, model\u001b[38;5;241m.\u001b[39mvocabulary_size \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m, (\u001b[38;5;241m1\u001b[39m, \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_size\u001b[49m))\n\u001b[1;32m      7\u001b[0m model \u001b[38;5;241m=\u001b[39m ONNXModel(model)  \u001b[38;5;66;03m# Nicer inferencing API\u001b[39;00m\n\u001b[1;32m      9\u001b[0m model\u001b[38;5;241m.\u001b[39meval()  \u001b[38;5;66;03m# Turn off dropout and other train-time operations\u001b[39;00m\n",
+      "File \u001b[0;32m~/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:220\u001b[0m, in \u001b[0;36mOptimizedModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m    218\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_orig_mod\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    219\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_modules[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_orig_mod\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 220\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_orig_mod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1931\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   1929\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m   1930\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1931\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[1;32m   1932\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1933\u001b[0m )\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'GPT' object has no attribute 'block_size'"
      ]
     }
    ],
     "\n",
     "from torch.onnx import dynamo_export, ExportOptions\n",
     "\n",
+    "example_input = torch.randint(0, model.vocabulary_size - 1, (1, 1024))\n",
     "\n",
     "model = ONNXModel(model)  # Nicer inferencing API\n",
     "\n",

exports/lightgpt-small.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f3c5af0c48df3f5af5f7eeccd5fc25b085f74ead592ff8b1af33b76246d9792
+size 1414536976

exports/lightgpt-small.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd9f6b0a2468fc8f28e7e95a0e57476cd24e36f78352df65bd1b00108d4e5265
+size 1414029160

requirements.txt CHANGED Viewed

@@ -1,12 +1,12 @@
 datasets==3.0.2
 numpy==1.26.4
 torch==2.5.1
 torchmetrics==1.5.1
-tiktoken==0.8.0
 tqdm==4.66.6
 matplotlib==3.9.2
 safetensors==0.5.2
 onnx==1.17.0
 onnxscript==0.1.0.dev20250108
 onnxruntime==1.20.1
-tensorboard==2.18.0

 datasets==3.0.2
+tiktoken==0.8.0
 numpy==1.26.4
 torch==2.5.1
 torchmetrics==1.5.1
 tqdm==4.66.6
 matplotlib==3.9.2
+tensorboard==2.18.0
 safetensors==0.5.2
 onnx==1.17.0
 onnxscript==0.1.0.dev20250108
 onnxruntime==1.20.1