Andrew DalPino commited on
Commit
98369b4
·
1 Parent(s): 8d59dc3

Export LightGPT small at 150 epochs

Browse files
README.md CHANGED
@@ -9,11 +9,11 @@ metrics:
9
  - perplexity
10
  pipeline_tag: text-generation
11
  tags:
12
- - LightGPT
13
  ---
14
  # LightGPT
15
 
16
- LightGPT is a lightweight generative pretrained Transformer (GPT) model for the people! Built using PyTorch and trained on the Fineweb and Alpaca datasets, LightGPT can answer questions, follow instructions, summarize documents, chat, and more. Best of all, the model weights *and* code are fully open-source for you to customize, improve upon, and share with the world.
17
 
18
  ## Features
19
 
 
9
  - perplexity
10
  pipeline_tag: text-generation
11
  tags:
12
+ - NoPE
13
  ---
14
  # LightGPT
15
 
16
+ LightGPT is a lightweight generative pretrained Transformer (GPT) language model for the people! Built using PyTorch and trained on the Fineweb and Alpaca datasets, LightGPT can answer questions, follow instructions, summarize documents, chat, and more. Best of all, the model weights *and* code are fully open-source for you to customize, improve upon, and share with the world.
17
 
18
  ## Features
19
 
export_model.ipynb CHANGED
@@ -9,7 +9,7 @@
9
  },
10
  {
11
  "cell_type": "code",
12
- "execution_count": 18,
13
  "metadata": {},
14
  "outputs": [],
15
  "source": [
@@ -28,7 +28,7 @@
28
  },
29
  {
30
  "cell_type": "code",
31
- "execution_count": 19,
32
  "metadata": {},
33
  "outputs": [
34
  {
@@ -64,7 +64,7 @@
64
  },
65
  {
66
  "cell_type": "code",
67
- "execution_count": 20,
68
  "metadata": {},
69
  "outputs": [],
70
  "source": [
@@ -93,7 +93,7 @@
93
  },
94
  {
95
  "cell_type": "code",
96
- "execution_count": 21,
97
  "metadata": {},
98
  "outputs": [
99
  {
@@ -125,23 +125,20 @@
125
  },
126
  {
127
  "cell_type": "code",
128
- "execution_count": 22,
129
  "metadata": {},
130
  "outputs": [
131
  {
132
- "name": "stderr",
133
- "output_type": "stream",
134
- "text": [
135
- "/home/andrew/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/onnx/_internal/_exporter_legacy.py:116: UserWarning: torch.onnx.dynamo_export only implements opset version 18 for now. If you need to use a different opset version, please register them with register_custom_op.\n",
136
- " warnings.warn(\n"
137
- ]
138
- },
139
- {
140
- "name": "stdout",
141
- "output_type": "stream",
142
- "text": [
143
- "Applied 72 of general pattern rewrite rules.\n",
144
- "Model saved to ./exports/lightgpt-small.onnx\n"
145
  ]
146
  }
147
  ],
@@ -150,7 +147,7 @@
150
  "\n",
151
  "from torch.onnx import dynamo_export, ExportOptions\n",
152
  "\n",
153
- "example_input = torch.randint(0, model.vocabulary_size - 1, (1, model.block_size))\n",
154
  "\n",
155
  "model = ONNXModel(model) # Nicer inferencing API\n",
156
  "\n",
 
9
  },
10
  {
11
  "cell_type": "code",
12
+ "execution_count": 1,
13
  "metadata": {},
14
  "outputs": [],
15
  "source": [
 
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": 2,
32
  "metadata": {},
33
  "outputs": [
34
  {
 
64
  },
65
  {
66
  "cell_type": "code",
67
+ "execution_count": 3,
68
  "metadata": {},
69
  "outputs": [],
70
  "source": [
 
93
  },
94
  {
95
  "cell_type": "code",
96
+ "execution_count": 4,
97
  "metadata": {},
98
  "outputs": [
99
  {
 
125
  },
126
  {
127
  "cell_type": "code",
128
+ "execution_count": null,
129
  "metadata": {},
130
  "outputs": [
131
  {
132
+ "ename": "AttributeError",
133
+ "evalue": "'GPT' object has no attribute 'block_size'",
134
+ "output_type": "error",
135
+ "traceback": [
136
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
137
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
138
+ "Cell \u001b[0;32mIn[5], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ONNXModel\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01monnx\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dynamo_export, ExportOptions\n\u001b[0;32m----> 5\u001b[0m example_input \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m0\u001b[39m, model\u001b[38;5;241m.\u001b[39mvocabulary_size \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m, (\u001b[38;5;241m1\u001b[39m, \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_size\u001b[49m))\n\u001b[1;32m 7\u001b[0m model \u001b[38;5;241m=\u001b[39m ONNXModel(model) \u001b[38;5;66;03m# Nicer inferencing API\u001b[39;00m\n\u001b[1;32m 9\u001b[0m model\u001b[38;5;241m.\u001b[39meval() \u001b[38;5;66;03m# Turn off dropout and other train-time operations\u001b[39;00m\n",
139
+ "File \u001b[0;32m~/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:220\u001b[0m, in \u001b[0;36mOptimizedModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_orig_mod\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_modules[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_orig_mod\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 220\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_orig_mod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
140
+ "File \u001b[0;32m~/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1931\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m 1930\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1931\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[1;32m 1932\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1933\u001b[0m )\n",
141
+ "\u001b[0;31mAttributeError\u001b[0m: 'GPT' object has no attribute 'block_size'"
 
 
 
142
  ]
143
  }
144
  ],
 
147
  "\n",
148
  "from torch.onnx import dynamo_export, ExportOptions\n",
149
  "\n",
150
+ "example_input = torch.randint(0, model.vocabulary_size - 1, (1, 1024))\n",
151
  "\n",
152
  "model = ONNXModel(model) # Nicer inferencing API\n",
153
  "\n",
exports/lightgpt-small.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3c5af0c48df3f5af5f7eeccd5fc25b085f74ead592ff8b1af33b76246d9792
3
+ size 1414536976
exports/lightgpt-small.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9f6b0a2468fc8f28e7e95a0e57476cd24e36f78352df65bd1b00108d4e5265
3
+ size 1414029160
requirements.txt CHANGED
@@ -1,12 +1,12 @@
1
  datasets==3.0.2
 
2
  numpy==1.26.4
3
  torch==2.5.1
4
  torchmetrics==1.5.1
5
- tiktoken==0.8.0
6
  tqdm==4.66.6
7
  matplotlib==3.9.2
 
8
  safetensors==0.5.2
9
  onnx==1.17.0
10
  onnxscript==0.1.0.dev20250108
11
  onnxruntime==1.20.1
12
- tensorboard==2.18.0
 
1
  datasets==3.0.2
2
+ tiktoken==0.8.0
3
  numpy==1.26.4
4
  torch==2.5.1
5
  torchmetrics==1.5.1
 
6
  tqdm==4.66.6
7
  matplotlib==3.9.2
8
+ tensorboard==2.18.0
9
  safetensors==0.5.2
10
  onnx==1.17.0
11
  onnxscript==0.1.0.dev20250108
12
  onnxruntime==1.20.1