Andrew DalPino
commited on
Commit
·
98369b4
1
Parent(s):
8d59dc3
Export LightGPT small at 150 epochs
Browse files- README.md +2 -2
- export_model.ipynb +16 -19
- exports/lightgpt-small.onnx +3 -0
- exports/lightgpt-small.safetensors +3 -0
- requirements.txt +2 -2
README.md
CHANGED
@@ -9,11 +9,11 @@ metrics:
|
|
9 |
- perplexity
|
10 |
pipeline_tag: text-generation
|
11 |
tags:
|
12 |
-
-
|
13 |
---
|
14 |
# LightGPT
|
15 |
|
16 |
-
LightGPT is a lightweight generative pretrained Transformer (GPT) model for the people! Built using PyTorch and trained on the Fineweb and Alpaca datasets, LightGPT can answer questions, follow instructions, summarize documents, chat, and more. Best of all, the model weights *and* code are fully open-source for you to customize, improve upon, and share with the world.
|
17 |
|
18 |
## Features
|
19 |
|
|
|
9 |
- perplexity
|
10 |
pipeline_tag: text-generation
|
11 |
tags:
|
12 |
+
- NoPE
|
13 |
---
|
14 |
# LightGPT
|
15 |
|
16 |
+
LightGPT is a lightweight generative pretrained Transformer (GPT) language model for the people! Built using PyTorch and trained on the Fineweb and Alpaca datasets, LightGPT can answer questions, follow instructions, summarize documents, chat, and more. Best of all, the model weights *and* code are fully open-source for you to customize, improve upon, and share with the world.
|
17 |
|
18 |
## Features
|
19 |
|
export_model.ipynb
CHANGED
@@ -9,7 +9,7 @@
|
|
9 |
},
|
10 |
{
|
11 |
"cell_type": "code",
|
12 |
-
"execution_count":
|
13 |
"metadata": {},
|
14 |
"outputs": [],
|
15 |
"source": [
|
@@ -28,7 +28,7 @@
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
-
"execution_count":
|
32 |
"metadata": {},
|
33 |
"outputs": [
|
34 |
{
|
@@ -64,7 +64,7 @@
|
|
64 |
},
|
65 |
{
|
66 |
"cell_type": "code",
|
67 |
-
"execution_count":
|
68 |
"metadata": {},
|
69 |
"outputs": [],
|
70 |
"source": [
|
@@ -93,7 +93,7 @@
|
|
93 |
},
|
94 |
{
|
95 |
"cell_type": "code",
|
96 |
-
"execution_count":
|
97 |
"metadata": {},
|
98 |
"outputs": [
|
99 |
{
|
@@ -125,23 +125,20 @@
|
|
125 |
},
|
126 |
{
|
127 |
"cell_type": "code",
|
128 |
-
"execution_count":
|
129 |
"metadata": {},
|
130 |
"outputs": [
|
131 |
{
|
132 |
-
"
|
133 |
-
"
|
134 |
-
"
|
135 |
-
|
136 |
-
"
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
"text": [
|
143 |
-
"Applied 72 of general pattern rewrite rules.\n",
|
144 |
-
"Model saved to ./exports/lightgpt-small.onnx\n"
|
145 |
]
|
146 |
}
|
147 |
],
|
@@ -150,7 +147,7 @@
|
|
150 |
"\n",
|
151 |
"from torch.onnx import dynamo_export, ExportOptions\n",
|
152 |
"\n",
|
153 |
-
"example_input = torch.randint(0, model.vocabulary_size - 1, (1,
|
154 |
"\n",
|
155 |
"model = ONNXModel(model) # Nicer inferencing API\n",
|
156 |
"\n",
|
|
|
9 |
},
|
10 |
{
|
11 |
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
"metadata": {},
|
14 |
"outputs": [],
|
15 |
"source": [
|
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
+
"execution_count": 2,
|
32 |
"metadata": {},
|
33 |
"outputs": [
|
34 |
{
|
|
|
64 |
},
|
65 |
{
|
66 |
"cell_type": "code",
|
67 |
+
"execution_count": 3,
|
68 |
"metadata": {},
|
69 |
"outputs": [],
|
70 |
"source": [
|
|
|
93 |
},
|
94 |
{
|
95 |
"cell_type": "code",
|
96 |
+
"execution_count": 4,
|
97 |
"metadata": {},
|
98 |
"outputs": [
|
99 |
{
|
|
|
125 |
},
|
126 |
{
|
127 |
"cell_type": "code",
|
128 |
+
"execution_count": null,
|
129 |
"metadata": {},
|
130 |
"outputs": [
|
131 |
{
|
132 |
+
"ename": "AttributeError",
|
133 |
+
"evalue": "'GPT' object has no attribute 'block_size'",
|
134 |
+
"output_type": "error",
|
135 |
+
"traceback": [
|
136 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
137 |
+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
138 |
+
"Cell \u001b[0;32mIn[5], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ONNXModel\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01monnx\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dynamo_export, ExportOptions\n\u001b[0;32m----> 5\u001b[0m example_input \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m0\u001b[39m, model\u001b[38;5;241m.\u001b[39mvocabulary_size \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m, (\u001b[38;5;241m1\u001b[39m, \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_size\u001b[49m))\n\u001b[1;32m 7\u001b[0m model \u001b[38;5;241m=\u001b[39m ONNXModel(model) \u001b[38;5;66;03m# Nicer inferencing API\u001b[39;00m\n\u001b[1;32m 9\u001b[0m model\u001b[38;5;241m.\u001b[39meval() \u001b[38;5;66;03m# Turn off dropout and other train-time operations\u001b[39;00m\n",
|
139 |
+
"File \u001b[0;32m~/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:220\u001b[0m, in \u001b[0;36mOptimizedModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_orig_mod\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_modules[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_orig_mod\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 220\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_orig_mod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
|
140 |
+
"File \u001b[0;32m~/Workspace/LightGPT/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1931\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m 1930\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1931\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[1;32m 1932\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1933\u001b[0m )\n",
|
141 |
+
"\u001b[0;31mAttributeError\u001b[0m: 'GPT' object has no attribute 'block_size'"
|
|
|
|
|
|
|
142 |
]
|
143 |
}
|
144 |
],
|
|
|
147 |
"\n",
|
148 |
"from torch.onnx import dynamo_export, ExportOptions\n",
|
149 |
"\n",
|
150 |
+
"example_input = torch.randint(0, model.vocabulary_size - 1, (1, 1024))\n",
|
151 |
"\n",
|
152 |
"model = ONNXModel(model) # Nicer inferencing API\n",
|
153 |
"\n",
|
exports/lightgpt-small.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f3c5af0c48df3f5af5f7eeccd5fc25b085f74ead592ff8b1af33b76246d9792
|
3 |
+
size 1414536976
|
exports/lightgpt-small.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd9f6b0a2468fc8f28e7e95a0e57476cd24e36f78352df65bd1b00108d4e5265
|
3 |
+
size 1414029160
|
requirements.txt
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
datasets==3.0.2
|
|
|
2 |
numpy==1.26.4
|
3 |
torch==2.5.1
|
4 |
torchmetrics==1.5.1
|
5 |
-
tiktoken==0.8.0
|
6 |
tqdm==4.66.6
|
7 |
matplotlib==3.9.2
|
|
|
8 |
safetensors==0.5.2
|
9 |
onnx==1.17.0
|
10 |
onnxscript==0.1.0.dev20250108
|
11 |
onnxruntime==1.20.1
|
12 |
-
tensorboard==2.18.0
|
|
|
1 |
datasets==3.0.2
|
2 |
+
tiktoken==0.8.0
|
3 |
numpy==1.26.4
|
4 |
torch==2.5.1
|
5 |
torchmetrics==1.5.1
|
|
|
6 |
tqdm==4.66.6
|
7 |
matplotlib==3.9.2
|
8 |
+
tensorboard==2.18.0
|
9 |
safetensors==0.5.2
|
10 |
onnx==1.17.0
|
11 |
onnxscript==0.1.0.dev20250108
|
12 |
onnxruntime==1.20.1
|
|