OpenOpenAI commited on
Commit
c6141aa
1 Parent(s): e9d7998

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 20,
6
+ "id": "88d5b164-4be8-4cce-b271-75c6de9e662a",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "PhiModel(\n",
13
+ " (embed_tokens): Embedding(51200, 2048)\n",
14
+ " (embed_dropout): Dropout(p=0.0, inplace=False)\n",
15
+ " (layers): ModuleList(\n",
16
+ " (0-23): 24 x PhiDecoderLayer(\n",
17
+ " (self_attn): PhiAttention(\n",
18
+ " (q_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
19
+ " (k_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
20
+ " (v_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
21
+ " (dense): Linear(in_features=2048, out_features=2048, bias=True)\n",
22
+ " (rotary_emb): PhiRotaryEmbedding()\n",
23
+ " )\n",
24
+ " (mlp): PhiMLP(\n",
25
+ " (activation_fn): NewGELUActivation()\n",
26
+ " (fc1): Linear(in_features=2048, out_features=8192, bias=True)\n",
27
+ " (fc2): Linear(in_features=8192, out_features=2048, bias=True)\n",
28
+ " )\n",
29
+ " (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n",
30
+ " (resid_dropout): Dropout(p=0.0, inplace=False)\n",
31
+ " )\n",
32
+ " )\n",
33
+ " (final_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n",
34
+ ")"
35
+ ]
36
+ },
37
+ "execution_count": 20,
38
+ "metadata": {},
39
+ "output_type": "execute_result"
40
+ }
41
+ ],
42
+ "source": [
43
+ "from transformers import AutoModel, AutoTokenizer, AutoConfig\n",
44
+ "\n",
45
+ "# Specify the path to your checkpoint directory\n",
46
+ "checkpoint_dir = \"/workspace/OpenRLHF3/checkpoint/llama-3-8b-rlhf/iter_14\"\n",
47
+ "\n",
48
+ "# Load the model configuration\n",
49
+ "config = AutoConfig.from_pretrained(checkpoint_dir)\n",
50
+ "\n",
51
+ "# Load the tokenizer\n",
52
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)\n",
53
+ "\n",
54
+ "# Load the model using the safetensors files\n",
55
+ "model = AutoModel.from_pretrained(checkpoint_dir)\n",
56
+ "\n",
57
+ "# Set the model to evaluation mode\n",
58
+ "model.eval()\n",
59
+ "\n",
60
+ "# Now the model is ready to use for inference"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 11,
66
+ "id": "3f4721b1-0853-47a0-88b4-7720a0ef79ee",
67
+ "metadata": {},
68
+ "outputs": [
69
+ {
70
+ "data": {
71
+ "application/vnd.jupyter.widget-view+json": {
72
+ "model_id": "3862f17dcf0d46998871e14b6324e47f",
73
+ "version_major": 2,
74
+ "version_minor": 0
75
+ },
76
+ "text/plain": [
77
+ "model-00001-of-00002.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
78
+ ]
79
+ },
80
+ "metadata": {},
81
+ "output_type": "display_data"
82
+ },
83
+ {
84
+ "data": {
85
+ "application/vnd.jupyter.widget-view+json": {
86
+ "model_id": "846214a9ecc446128aac3e409588581e",
87
+ "version_major": 2,
88
+ "version_minor": 0
89
+ },
90
+ "text/plain": [
91
+ "model-00002-of-00002.safetensors: 0%| | 0.00/269M [00:00<?, ?B/s]"
92
+ ]
93
+ },
94
+ "metadata": {},
95
+ "output_type": "display_data"
96
+ },
97
+ {
98
+ "data": {
99
+ "application/vnd.jupyter.widget-view+json": {
100
+ "model_id": "66f069e0e7bc4d5391170a352c323372",
101
+ "version_major": 2,
102
+ "version_minor": 0
103
+ },
104
+ "text/plain": [
105
+ "Upload 2 LFS files: 0%| | 0/2 [00:00<?, ?it/s]"
106
+ ]
107
+ },
108
+ "metadata": {},
109
+ "output_type": "display_data"
110
+ },
111
+ {
112
+ "data": {
113
+ "text/plain": [
114
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/ppo_iter_14/commit/f131fe145e8337eb720d51d26379a261d7cc38df', commit_message='Upload model', commit_description='', oid='f131fe145e8337eb720d51d26379a261d7cc38df', pr_url=None, pr_revision=None, pr_num=None)"
115
+ ]
116
+ },
117
+ "execution_count": 11,
118
+ "metadata": {},
119
+ "output_type": "execute_result"
120
+ }
121
+ ],
122
+ "source": [
123
+ "model.push_to_hub('OpenOpenAI/ppo_iter_14')"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 21,
129
+ "id": "6b467741-9810-4495-a968-64bdeb8d55cd",
130
+ "metadata": {},
131
+ "outputs": [
132
+ {
133
+ "data": {
134
+ "text/plain": [
135
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/ppo_iter_14/commit/3f245ca9967e3784192045bd2064da5801d6e171', commit_message='Upload config', commit_description='', oid='3f245ca9967e3784192045bd2064da5801d6e171', pr_url=None, pr_revision=None, pr_num=None)"
136
+ ]
137
+ },
138
+ "execution_count": 21,
139
+ "metadata": {},
140
+ "output_type": "execute_result"
141
+ }
142
+ ],
143
+ "source": [
144
+ "config.push_to_hub('OpenOpenAI/ppo_iter_14')"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": 17,
150
+ "id": "37bb8dbb-f2cd-46cd-bdcd-906b0ae6e74e",
151
+ "metadata": {},
152
+ "outputs": [
153
+ {
154
+ "data": {
155
+ "application/vnd.jupyter.widget-view+json": {
156
+ "model_id": "c2857b5b6b824996a40de218bbab77f2",
157
+ "version_major": 2,
158
+ "version_minor": 0
159
+ },
160
+ "text/plain": [
161
+ "README.md: 0%| | 0.00/5.17k [00:00<?, ?B/s]"
162
+ ]
163
+ },
164
+ "metadata": {},
165
+ "output_type": "display_data"
166
+ },
167
+ {
168
+ "data": {
169
+ "text/plain": [
170
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/checkpoint-2500/commit/77f7dc75043b34bc2e8b677cf9bc9440a4b56937', commit_message='Upload tokenizer', commit_description='', oid='77f7dc75043b34bc2e8b677cf9bc9440a4b56937', pr_url=None, pr_revision=None, pr_num=None)"
171
+ ]
172
+ },
173
+ "execution_count": 17,
174
+ "metadata": {},
175
+ "output_type": "execute_result"
176
+ }
177
+ ],
178
+ "source": [
179
+ "tokenizer.push_to_hub('OpenOpenAI/checkpoint-2500')"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 19,
185
+ "id": "62eedc73-6524-4cdb-933a-702e1812b9bb",
186
+ "metadata": {},
187
+ "outputs": [
188
+ {
189
+ "data": {
190
+ "text/plain": [
191
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/checkpoint-2500/commit/ccdb68475a6b305183c27d9772fd4af0949f78b5', commit_message='Upload config', commit_description='', oid='ccdb68475a6b305183c27d9772fd4af0949f78b5', pr_url=None, pr_revision=None, pr_num=None)"
192
+ ]
193
+ },
194
+ "execution_count": 19,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "config.push_to_hub('OpenOpenAI/checkpoint-2500')"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 18,
206
+ "id": "acc6a23e-c7c7-404e-a51f-e5b7a0ad0df9",
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "data": {
211
+ "application/vnd.jupyter.widget-view+json": {
212
+ "model_id": "d04b28ff3e8246dc803a95bb4f15d3c2",
213
+ "version_major": 2,
214
+ "version_minor": 0
215
+ },
216
+ "text/plain": [
217
+ "tokenizer_config.json: 0%| | 0.00/7.51k [00:00<?, ?B/s]"
218
+ ]
219
+ },
220
+ "metadata": {},
221
+ "output_type": "display_data"
222
+ },
223
+ {
224
+ "data": {
225
+ "application/vnd.jupyter.widget-view+json": {
226
+ "model_id": "29dd2c2cf11140d5b967ecfc19fe2c68",
227
+ "version_major": 2,
228
+ "version_minor": 0
229
+ },
230
+ "text/plain": [
231
+ "vocab.json: 0%| | 0.00/798k [00:00<?, ?B/s]"
232
+ ]
233
+ },
234
+ "metadata": {},
235
+ "output_type": "display_data"
236
+ },
237
+ {
238
+ "data": {
239
+ "application/vnd.jupyter.widget-view+json": {
240
+ "model_id": "14a2f9cd8d0e4c7faef69ebe828fb142",
241
+ "version_major": 2,
242
+ "version_minor": 0
243
+ },
244
+ "text/plain": [
245
+ "merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]"
246
+ ]
247
+ },
248
+ "metadata": {},
249
+ "output_type": "display_data"
250
+ },
251
+ {
252
+ "data": {
253
+ "application/vnd.jupyter.widget-view+json": {
254
+ "model_id": "fd232cd0227c48fa8923669b8afa40b8",
255
+ "version_major": 2,
256
+ "version_minor": 0
257
+ },
258
+ "text/plain": [
259
+ "tokenizer.json: 0%| | 0.00/2.12M [00:00<?, ?B/s]"
260
+ ]
261
+ },
262
+ "metadata": {},
263
+ "output_type": "display_data"
264
+ },
265
+ {
266
+ "data": {
267
+ "application/vnd.jupyter.widget-view+json": {
268
+ "model_id": "62b052078fca48e2ae4a3b928671c3ff",
269
+ "version_major": 2,
270
+ "version_minor": 0
271
+ },
272
+ "text/plain": [
273
+ "added_tokens.json: 0%| | 0.00/1.08k [00:00<?, ?B/s]"
274
+ ]
275
+ },
276
+ "metadata": {},
277
+ "output_type": "display_data"
278
+ },
279
+ {
280
+ "data": {
281
+ "application/vnd.jupyter.widget-view+json": {
282
+ "model_id": "e49797476ec14ef99a1161a673030aa4",
283
+ "version_major": 2,
284
+ "version_minor": 0
285
+ },
286
+ "text/plain": [
287
+ "special_tokens_map.json: 0%| | 0.00/587 [00:00<?, ?B/s]"
288
+ ]
289
+ },
290
+ "metadata": {},
291
+ "output_type": "display_data"
292
+ },
293
+ {
294
+ "data": {
295
+ "application/vnd.jupyter.widget-view+json": {
296
+ "model_id": "1a52317f238f496892f43e940b0f88f5",
297
+ "version_major": 2,
298
+ "version_minor": 0
299
+ },
300
+ "text/plain": [
301
+ "model.safetensors.index.json: 0%| | 0.00/24.7k [00:00<?, ?B/s]"
302
+ ]
303
+ },
304
+ "metadata": {},
305
+ "output_type": "display_data"
306
+ },
307
+ {
308
+ "data": {
309
+ "application/vnd.jupyter.widget-view+json": {
310
+ "model_id": "756caaf635bf477e815b198d3c1a4bae",
311
+ "version_major": 2,
312
+ "version_minor": 0
313
+ },
314
+ "text/plain": [
315
+ "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
316
+ ]
317
+ },
318
+ "metadata": {},
319
+ "output_type": "display_data"
320
+ },
321
+ {
322
+ "data": {
323
+ "application/vnd.jupyter.widget-view+json": {
324
+ "model_id": "20ee3df9a2794587acfa73cadb03a39f",
325
+ "version_major": 2,
326
+ "version_minor": 0
327
+ },
328
+ "text/plain": [
329
+ "model-00001-of-00002.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
330
+ ]
331
+ },
332
+ "metadata": {},
333
+ "output_type": "display_data"
334
+ },
335
+ {
336
+ "ename": "KeyboardInterrupt",
337
+ "evalue": "",
338
+ "output_type": "error",
339
+ "traceback": [
340
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
341
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
342
+ "Cell \u001b[0;32mIn[18], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModel\n\u001b[1;32m 4\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOpenOpenAI/checkpoint-2500\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mOpenOpenAI/checkpoint-2500\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
343
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n",
344
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:3658\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3655\u001b[0m \u001b[38;5;66;03m# We'll need to download and cache each checkpoint shard if the checkpoint is sharded.\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_sharded:\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;66;03m# resolved_archive_file becomes a list of files that point to the different checkpoint shards in this case.\u001b[39;00m\n\u001b[0;32m-> 3658\u001b[0m resolved_archive_file, sharded_metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_checkpoint_shard_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3659\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3660\u001b[0m \u001b[43m \u001b[49m\u001b[43mresolved_archive_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3661\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3662\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3663\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3664\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3665\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3666\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3667\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3668\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3669\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3670\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3671\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3673\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 3674\u001b[0m is_safetensors_available()\n\u001b[1;32m 3675\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resolved_archive_file, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m 3676\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m resolved_archive_file\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.safetensors\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3677\u001b[0m ):\n\u001b[1;32m 3678\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m safe_open(resolved_archive_file, framework\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n",
345
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:1079\u001b[0m, in \u001b[0;36mget_checkpoint_shard_files\u001b[0;34m(pretrained_model_name_or_path, index_filename, cache_dir, force_download, proxies, resume_download, local_files_only, token, user_agent, revision, subfolder, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 1076\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m shard_filename \u001b[38;5;129;01min\u001b[39;00m tqdm(shard_filenames, desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading shards\u001b[39m\u001b[38;5;124m\"\u001b[39m, disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m show_progress_bar):\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1078\u001b[0m \u001b[38;5;66;03m# Load from URL\u001b[39;00m\n\u001b[0;32m-> 1079\u001b[0m cached_filename \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1080\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1081\u001b[0m \u001b[43m \u001b[49m\u001b[43mshard_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1082\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1083\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1084\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1085\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1086\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1087\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1088\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1089\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1090\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_commit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;66;03m# We have already dealt with RepositoryNotFoundError and RevisionNotFoundError when getting the index, so\u001b[39;00m\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;66;03m# we don't have to catch them here.\u001b[39;00m\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError:\n",
346
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:402\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 399\u001b[0m user_agent \u001b[38;5;241m=\u001b[39m http_user_agent(user_agent)\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 401\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 402\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 403\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 404\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 405\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 406\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 407\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 408\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 409\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 417\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n",
347
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:101\u001b[0m, in \u001b[0;36m_deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m message \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m custom_message\n\u001b[1;32m 100\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message, \u001b[38;5;167;01mFutureWarning\u001b[39;00m)\n\u001b[0;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
348
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
349
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1240\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m 1220\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_local_dir(\n\u001b[1;32m 1221\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[1;32m 1222\u001b[0m local_dir\u001b[38;5;241m=\u001b[39mlocal_dir,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[1;32m 1238\u001b[0m )\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1241\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m 1242\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1243\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m 1244\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1246\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1247\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1248\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m 1249\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1250\u001b[0m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1251\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1252\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1253\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1254\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m 1255\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1256\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1257\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
350
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1389\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m 1387\u001b[0m Path(lock_path)\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1388\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m WeakFileLock(lock_path):\n\u001b[0;32m-> 1389\u001b[0m \u001b[43m_download_to_tmp_and_move\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1390\u001b[0m \u001b[43m \u001b[49m\u001b[43mincomplete_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblob_path\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.incomplete\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1391\u001b[0m \u001b[43m \u001b[49m\u001b[43mdestination_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblob_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1392\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_to_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl_to_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1393\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1394\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1395\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpected_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1397\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1398\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1399\u001b[0m _create_symlink(blob_path, pointer_path, new_blob\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1401\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pointer_path\n",
351
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1915\u001b[0m, in \u001b[0;36m_download_to_tmp_and_move\u001b[0;34m(incomplete_path, destination_path, url_to_download, proxies, headers, expected_size, filename, force_download)\u001b[0m\n\u001b[1;32m 1912\u001b[0m _check_disk_space(expected_size, incomplete_path\u001b[38;5;241m.\u001b[39mparent)\n\u001b[1;32m 1913\u001b[0m _check_disk_space(expected_size, destination_path\u001b[38;5;241m.\u001b[39mparent)\n\u001b[0;32m-> 1915\u001b[0m \u001b[43mhttp_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1916\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_to_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1917\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1918\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1919\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1920\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1921\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpected_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1922\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1924\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownload complete. Moving file to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdestination_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1925\u001b[0m _chmod_and_move(incomplete_path, destination_path)\n",
352
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:549\u001b[0m, in \u001b[0;36mhttp_get\u001b[0;34m(url, temp_file, proxies, resume_size, headers, expected_size, displayed_filename, _nb_retries, _tqdm_bar)\u001b[0m\n\u001b[1;32m 547\u001b[0m new_resume_size \u001b[38;5;241m=\u001b[39m resume_size\n\u001b[1;32m 548\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 549\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m r\u001b[38;5;241m.\u001b[39miter_content(chunk_size\u001b[38;5;241m=\u001b[39mDOWNLOAD_CHUNK_SIZE):\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk: \u001b[38;5;66;03m# filter out keep-alive new chunks\u001b[39;00m\n\u001b[1;32m 551\u001b[0m progress\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n",
353
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 819\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
354
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:628\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp):\n\u001b[0;32m--> 628\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m data\n",
355
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:567\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 564\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m--> 567\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 569\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
356
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:533\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n\u001b[1;32m 531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 532\u001b[0m \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m--> 533\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
357
+ "File \u001b[0;32m/usr/lib/python3.10/http/client.py:466\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 466\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 467\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
358
+ "File \u001b[0;32m/usr/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
359
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1275\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
360
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
361
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
362
+ ]
363
+ }
364
+ ],
365
+ "source": [
366
+ "# Load model directly\n",
367
+ "from transformers import AutoTokenizer, AutoModel\n",
368
+ "\n",
369
+ "tokenizer = AutoTokenizer.from_pretrained(\"OpenOpenAI/checkpoint-2500\")\n",
370
+ "model = AutoModel.from_pretrained(\"OpenOpenAI/checkpoint-2500\")"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "id": "d3443f20-cd0e-4fa0-97ab-a69f73c72be6",
377
+ "metadata": {},
378
+ "outputs": [],
379
+ "source": []
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "id": "65b919ca-4d3c-49bf-a0bc-4240defb84d1",
385
+ "metadata": {},
386
+ "outputs": [],
387
+ "source": []
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": null,
392
+ "id": "5000a1a2-a562-403e-af50-9477a3ecbcbb",
393
+ "metadata": {},
394
+ "outputs": [
395
+ {
396
+ "data": {
397
+ "application/vnd.jupyter.widget-view+json": {
398
+ "model_id": "ce2c8a458e2f48209383c557f58f1e5d",
399
+ "version_major": 2,
400
+ "version_minor": 0
401
+ },
402
+ "text/plain": [
403
+ "config.json: 0%| | 0.00/743 [00:00<?, ?B/s]"
404
+ ]
405
+ },
406
+ "metadata": {},
407
+ "output_type": "display_data"
408
+ },
409
+ {
410
+ "data": {
411
+ "application/vnd.jupyter.widget-view+json": {
412
+ "model_id": "eae84f82c9304d9f8af556958830f436",
413
+ "version_major": 2,
414
+ "version_minor": 0
415
+ },
416
+ "text/plain": [
417
+ "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
418
+ ]
419
+ },
420
+ "metadata": {},
421
+ "output_type": "display_data"
422
+ },
423
+ {
424
+ "data": {
425
+ "application/vnd.jupyter.widget-view+json": {
426
+ "model_id": "7b95d784fc494882852373c34cbd4019",
427
+ "version_major": 2,
428
+ "version_minor": 0
429
+ },
430
+ "text/plain": [
431
+ "model-00001-of-00002.safetensors: 5%|4 | 241M/4.98G [00:00<?, ?B/s]"
432
+ ]
433
+ },
434
+ "metadata": {},
435
+ "output_type": "display_data"
436
+ }
437
+ ],
438
+ "source": [
439
+ "# Load model directly\n",
440
+ "from transformers import AutoTokenizer, AutoModel\n",
441
+ "\n",
442
+ "tokenizer = AutoTokenizer.from_pretrained(\"OpenOpenAI/checkpoint-2500\")\n",
443
+ "model = AutoModel.from_pretrained(\"OpenOpenAI/checkpoint-2500\")"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": null,
449
+ "id": "0f8a691a-15be-4ce2-9ea1-8758471f5824",
450
+ "metadata": {},
451
+ "outputs": [],
452
+ "source": []
453
+ },
454
+ {
455
+ "cell_type": "code",
456
+ "execution_count": 13,
457
+ "id": "9da6361a-e291-4a91-a942-676f84292c43",
458
+ "metadata": {},
459
+ "outputs": [
460
+ {
461
+ "ename": "OSError",
462
+ "evalue": "Can't load tokenizer for 'OpenOpenAI/checkpoint-2500'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'OpenOpenAI/checkpoint-2500' is the correct path to a directory containing all relevant files for a CodeGenTokenizerFast tokenizer.",
463
+ "output_type": "error",
464
+ "traceback": [
465
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
466
+ "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
467
+ "Cell \u001b[0;32mIn[13], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModel\n\u001b[1;32m 3\u001b[0m model_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOpenOpenAI/checkpoint-2500\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# Replace with the correct path\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModel\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_name)\n",
468
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:915\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 912\u001b[0m tokenizer_class_py, tokenizer_class_fast \u001b[38;5;241m=\u001b[39m TOKENIZER_MAPPING[\u001b[38;5;28mtype\u001b[39m(config)]\n\u001b[1;32m 914\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_fast \u001b[38;5;129;01mand\u001b[39;00m (use_fast \u001b[38;5;129;01mor\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 915\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtokenizer_class_fast\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 917\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
469
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2275\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m 2272\u001b[0m \u001b[38;5;66;03m# If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be\u001b[39;00m\n\u001b[1;32m 2273\u001b[0m \u001b[38;5;66;03m# loaded directly from the GGUF file.\u001b[39;00m\n\u001b[1;32m 2274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gguf_file:\n\u001b[0;32m-> 2275\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 2276\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2277\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2278\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2279\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2280\u001b[0m )\n\u001b[1;32m 2282\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 2283\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n",
470
+ "\u001b[0;31mOSError\u001b[0m: Can't load tokenizer for 'OpenOpenAI/checkpoint-2500'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'OpenOpenAI/checkpoint-2500' is the correct path to a directory containing all relevant files for a CodeGenTokenizerFast tokenizer."
471
+ ]
472
+ }
473
+ ],
474
+ "source": [
475
+ "from transformers import AutoTokenizer, AutoModel\n",
476
+ "\n",
477
+ "model_name = \"OpenOpenAI/checkpoint-2500\" # Replace with the correct path\n",
478
+ "\n",
479
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
480
+ "model = AutoModel.from_pretrained(model_name)"
481
+ ]
482
+ },
483
+ {
484
+ "cell_type": "code",
485
+ "execution_count": null,
486
+ "id": "0da6e77a-bfb1-4e24-af67-1b76009f7289",
487
+ "metadata": {},
488
+ "outputs": [],
489
+ "source": []
490
+ }
491
+ ],
492
+ "metadata": {
493
+ "kernelspec": {
494
+ "display_name": "Python 3 (ipykernel)",
495
+ "language": "python",
496
+ "name": "python3"
497
+ },
498
+ "language_info": {
499
+ "codemirror_mode": {
500
+ "name": "ipython",
501
+ "version": 3
502
+ },
503
+ "file_extension": ".py",
504
+ "mimetype": "text/x-python",
505
+ "name": "python",
506
+ "nbconvert_exporter": "python",
507
+ "pygments_lexer": "ipython3",
508
+ "version": "3.10.12"
509
+ }
510
+ },
511
+ "nbformat": 4,
512
+ "nbformat_minor": 5
513
+ }
.ipynb_checkpoints/added_tokens-checkpoint.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
Untitled.ipynb ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 20,
6
+ "id": "88d5b164-4be8-4cce-b271-75c6de9e662a",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "PhiModel(\n",
13
+ " (embed_tokens): Embedding(51200, 2048)\n",
14
+ " (embed_dropout): Dropout(p=0.0, inplace=False)\n",
15
+ " (layers): ModuleList(\n",
16
+ " (0-23): 24 x PhiDecoderLayer(\n",
17
+ " (self_attn): PhiAttention(\n",
18
+ " (q_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
19
+ " (k_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
20
+ " (v_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
21
+ " (dense): Linear(in_features=2048, out_features=2048, bias=True)\n",
22
+ " (rotary_emb): PhiRotaryEmbedding()\n",
23
+ " )\n",
24
+ " (mlp): PhiMLP(\n",
25
+ " (activation_fn): NewGELUActivation()\n",
26
+ " (fc1): Linear(in_features=2048, out_features=8192, bias=True)\n",
27
+ " (fc2): Linear(in_features=8192, out_features=2048, bias=True)\n",
28
+ " )\n",
29
+ " (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n",
30
+ " (resid_dropout): Dropout(p=0.0, inplace=False)\n",
31
+ " )\n",
32
+ " )\n",
33
+ " (final_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n",
34
+ ")"
35
+ ]
36
+ },
37
+ "execution_count": 20,
38
+ "metadata": {},
39
+ "output_type": "execute_result"
40
+ }
41
+ ],
42
+ "source": [
43
+ "from transformers import AutoModel, AutoTokenizer, AutoConfig\n",
44
+ "\n",
45
+ "# Specify the path to your checkpoint directory\n",
46
+ "checkpoint_dir = \"/workspace/OpenRLHF3/checkpoint/llama-3-8b-rlhf/iter_14\"\n",
47
+ "\n",
48
+ "# Load the model configuration\n",
49
+ "config = AutoConfig.from_pretrained(checkpoint_dir)\n",
50
+ "\n",
51
+ "# Load the tokenizer\n",
52
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)\n",
53
+ "\n",
54
+ "# Load the model using the safetensors files\n",
55
+ "model = AutoModel.from_pretrained(checkpoint_dir)\n",
56
+ "\n",
57
+ "# Set the model to evaluation mode\n",
58
+ "model.eval()\n",
59
+ "\n",
60
+ "# Now the model is ready to use for inference"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 11,
66
+ "id": "3f4721b1-0853-47a0-88b4-7720a0ef79ee",
67
+ "metadata": {},
68
+ "outputs": [
69
+ {
70
+ "data": {
71
+ "application/vnd.jupyter.widget-view+json": {
72
+ "model_id": "3862f17dcf0d46998871e14b6324e47f",
73
+ "version_major": 2,
74
+ "version_minor": 0
75
+ },
76
+ "text/plain": [
77
+ "model-00001-of-00002.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
78
+ ]
79
+ },
80
+ "metadata": {},
81
+ "output_type": "display_data"
82
+ },
83
+ {
84
+ "data": {
85
+ "application/vnd.jupyter.widget-view+json": {
86
+ "model_id": "846214a9ecc446128aac3e409588581e",
87
+ "version_major": 2,
88
+ "version_minor": 0
89
+ },
90
+ "text/plain": [
91
+ "model-00002-of-00002.safetensors: 0%| | 0.00/269M [00:00<?, ?B/s]"
92
+ ]
93
+ },
94
+ "metadata": {},
95
+ "output_type": "display_data"
96
+ },
97
+ {
98
+ "data": {
99
+ "application/vnd.jupyter.widget-view+json": {
100
+ "model_id": "66f069e0e7bc4d5391170a352c323372",
101
+ "version_major": 2,
102
+ "version_minor": 0
103
+ },
104
+ "text/plain": [
105
+ "Upload 2 LFS files: 0%| | 0/2 [00:00<?, ?it/s]"
106
+ ]
107
+ },
108
+ "metadata": {},
109
+ "output_type": "display_data"
110
+ },
111
+ {
112
+ "data": {
113
+ "text/plain": [
114
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/ppo_iter_14/commit/f131fe145e8337eb720d51d26379a261d7cc38df', commit_message='Upload model', commit_description='', oid='f131fe145e8337eb720d51d26379a261d7cc38df', pr_url=None, pr_revision=None, pr_num=None)"
115
+ ]
116
+ },
117
+ "execution_count": 11,
118
+ "metadata": {},
119
+ "output_type": "execute_result"
120
+ }
121
+ ],
122
+ "source": [
123
+ "model.push_to_hub('OpenOpenAI/ppo_iter_14')"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 21,
129
+ "id": "6b467741-9810-4495-a968-64bdeb8d55cd",
130
+ "metadata": {},
131
+ "outputs": [
132
+ {
133
+ "data": {
134
+ "text/plain": [
135
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/ppo_iter_14/commit/3f245ca9967e3784192045bd2064da5801d6e171', commit_message='Upload config', commit_description='', oid='3f245ca9967e3784192045bd2064da5801d6e171', pr_url=None, pr_revision=None, pr_num=None)"
136
+ ]
137
+ },
138
+ "execution_count": 21,
139
+ "metadata": {},
140
+ "output_type": "execute_result"
141
+ }
142
+ ],
143
+ "source": [
144
+ "config.push_to_hub('OpenOpenAI/ppo_iter_14')"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": 17,
150
+ "id": "37bb8dbb-f2cd-46cd-bdcd-906b0ae6e74e",
151
+ "metadata": {},
152
+ "outputs": [
153
+ {
154
+ "data": {
155
+ "application/vnd.jupyter.widget-view+json": {
156
+ "model_id": "c2857b5b6b824996a40de218bbab77f2",
157
+ "version_major": 2,
158
+ "version_minor": 0
159
+ },
160
+ "text/plain": [
161
+ "README.md: 0%| | 0.00/5.17k [00:00<?, ?B/s]"
162
+ ]
163
+ },
164
+ "metadata": {},
165
+ "output_type": "display_data"
166
+ },
167
+ {
168
+ "data": {
169
+ "text/plain": [
170
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/checkpoint-2500/commit/77f7dc75043b34bc2e8b677cf9bc9440a4b56937', commit_message='Upload tokenizer', commit_description='', oid='77f7dc75043b34bc2e8b677cf9bc9440a4b56937', pr_url=None, pr_revision=None, pr_num=None)"
171
+ ]
172
+ },
173
+ "execution_count": 17,
174
+ "metadata": {},
175
+ "output_type": "execute_result"
176
+ }
177
+ ],
178
+ "source": [
179
+ "tokenizer.push_to_hub('OpenOpenAI/checkpoint-2500')"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 19,
185
+ "id": "62eedc73-6524-4cdb-933a-702e1812b9bb",
186
+ "metadata": {},
187
+ "outputs": [
188
+ {
189
+ "data": {
190
+ "text/plain": [
191
+ "CommitInfo(commit_url='https://huggingface.co/OpenOpenAI/checkpoint-2500/commit/ccdb68475a6b305183c27d9772fd4af0949f78b5', commit_message='Upload config', commit_description='', oid='ccdb68475a6b305183c27d9772fd4af0949f78b5', pr_url=None, pr_revision=None, pr_num=None)"
192
+ ]
193
+ },
194
+ "execution_count": 19,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "config.push_to_hub('OpenOpenAI/checkpoint-2500')"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 18,
206
+ "id": "acc6a23e-c7c7-404e-a51f-e5b7a0ad0df9",
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "data": {
211
+ "application/vnd.jupyter.widget-view+json": {
212
+ "model_id": "d04b28ff3e8246dc803a95bb4f15d3c2",
213
+ "version_major": 2,
214
+ "version_minor": 0
215
+ },
216
+ "text/plain": [
217
+ "tokenizer_config.json: 0%| | 0.00/7.51k [00:00<?, ?B/s]"
218
+ ]
219
+ },
220
+ "metadata": {},
221
+ "output_type": "display_data"
222
+ },
223
+ {
224
+ "data": {
225
+ "application/vnd.jupyter.widget-view+json": {
226
+ "model_id": "29dd2c2cf11140d5b967ecfc19fe2c68",
227
+ "version_major": 2,
228
+ "version_minor": 0
229
+ },
230
+ "text/plain": [
231
+ "vocab.json: 0%| | 0.00/798k [00:00<?, ?B/s]"
232
+ ]
233
+ },
234
+ "metadata": {},
235
+ "output_type": "display_data"
236
+ },
237
+ {
238
+ "data": {
239
+ "application/vnd.jupyter.widget-view+json": {
240
+ "model_id": "14a2f9cd8d0e4c7faef69ebe828fb142",
241
+ "version_major": 2,
242
+ "version_minor": 0
243
+ },
244
+ "text/plain": [
245
+ "merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]"
246
+ ]
247
+ },
248
+ "metadata": {},
249
+ "output_type": "display_data"
250
+ },
251
+ {
252
+ "data": {
253
+ "application/vnd.jupyter.widget-view+json": {
254
+ "model_id": "fd232cd0227c48fa8923669b8afa40b8",
255
+ "version_major": 2,
256
+ "version_minor": 0
257
+ },
258
+ "text/plain": [
259
+ "tokenizer.json: 0%| | 0.00/2.12M [00:00<?, ?B/s]"
260
+ ]
261
+ },
262
+ "metadata": {},
263
+ "output_type": "display_data"
264
+ },
265
+ {
266
+ "data": {
267
+ "application/vnd.jupyter.widget-view+json": {
268
+ "model_id": "62b052078fca48e2ae4a3b928671c3ff",
269
+ "version_major": 2,
270
+ "version_minor": 0
271
+ },
272
+ "text/plain": [
273
+ "added_tokens.json: 0%| | 0.00/1.08k [00:00<?, ?B/s]"
274
+ ]
275
+ },
276
+ "metadata": {},
277
+ "output_type": "display_data"
278
+ },
279
+ {
280
+ "data": {
281
+ "application/vnd.jupyter.widget-view+json": {
282
+ "model_id": "e49797476ec14ef99a1161a673030aa4",
283
+ "version_major": 2,
284
+ "version_minor": 0
285
+ },
286
+ "text/plain": [
287
+ "special_tokens_map.json: 0%| | 0.00/587 [00:00<?, ?B/s]"
288
+ ]
289
+ },
290
+ "metadata": {},
291
+ "output_type": "display_data"
292
+ },
293
+ {
294
+ "data": {
295
+ "application/vnd.jupyter.widget-view+json": {
296
+ "model_id": "1a52317f238f496892f43e940b0f88f5",
297
+ "version_major": 2,
298
+ "version_minor": 0
299
+ },
300
+ "text/plain": [
301
+ "model.safetensors.index.json: 0%| | 0.00/24.7k [00:00<?, ?B/s]"
302
+ ]
303
+ },
304
+ "metadata": {},
305
+ "output_type": "display_data"
306
+ },
307
+ {
308
+ "data": {
309
+ "application/vnd.jupyter.widget-view+json": {
310
+ "model_id": "756caaf635bf477e815b198d3c1a4bae",
311
+ "version_major": 2,
312
+ "version_minor": 0
313
+ },
314
+ "text/plain": [
315
+ "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
316
+ ]
317
+ },
318
+ "metadata": {},
319
+ "output_type": "display_data"
320
+ },
321
+ {
322
+ "data": {
323
+ "application/vnd.jupyter.widget-view+json": {
324
+ "model_id": "20ee3df9a2794587acfa73cadb03a39f",
325
+ "version_major": 2,
326
+ "version_minor": 0
327
+ },
328
+ "text/plain": [
329
+ "model-00001-of-00002.safetensors: 0%| | 0.00/4.98G [00:00<?, ?B/s]"
330
+ ]
331
+ },
332
+ "metadata": {},
333
+ "output_type": "display_data"
334
+ },
335
+ {
336
+ "ename": "KeyboardInterrupt",
337
+ "evalue": "",
338
+ "output_type": "error",
339
+ "traceback": [
340
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
341
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
342
+ "Cell \u001b[0;32mIn[18], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModel\n\u001b[1;32m 4\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOpenOpenAI/checkpoint-2500\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mOpenOpenAI/checkpoint-2500\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
343
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n",
344
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:3658\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3655\u001b[0m \u001b[38;5;66;03m# We'll need to download and cache each checkpoint shard if the checkpoint is sharded.\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_sharded:\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;66;03m# resolved_archive_file becomes a list of files that point to the different checkpoint shards in this case.\u001b[39;00m\n\u001b[0;32m-> 3658\u001b[0m resolved_archive_file, sharded_metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_checkpoint_shard_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3659\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3660\u001b[0m \u001b[43m \u001b[49m\u001b[43mresolved_archive_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3661\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3662\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3663\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3664\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3665\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3666\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3667\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3668\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3669\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3670\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3671\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3673\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 3674\u001b[0m is_safetensors_available()\n\u001b[1;32m 3675\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resolved_archive_file, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m 3676\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m resolved_archive_file\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.safetensors\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3677\u001b[0m ):\n\u001b[1;32m 3678\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m safe_open(resolved_archive_file, framework\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n",
345
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:1079\u001b[0m, in \u001b[0;36mget_checkpoint_shard_files\u001b[0;34m(pretrained_model_name_or_path, index_filename, cache_dir, force_download, proxies, resume_download, local_files_only, token, user_agent, revision, subfolder, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 1076\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m shard_filename \u001b[38;5;129;01min\u001b[39;00m tqdm(shard_filenames, desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading shards\u001b[39m\u001b[38;5;124m\"\u001b[39m, disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m show_progress_bar):\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1078\u001b[0m \u001b[38;5;66;03m# Load from URL\u001b[39;00m\n\u001b[0;32m-> 1079\u001b[0m cached_filename \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1080\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1081\u001b[0m \u001b[43m \u001b[49m\u001b[43mshard_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1082\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1083\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1084\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1085\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1086\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1087\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1088\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1089\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1090\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_commit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;66;03m# We have already dealt with RepositoryNotFoundError and RevisionNotFoundError when getting the index, so\u001b[39;00m\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;66;03m# we don't have to catch them here.\u001b[39;00m\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError:\n",
346
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:402\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 399\u001b[0m user_agent \u001b[38;5;241m=\u001b[39m http_user_agent(user_agent)\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 401\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 402\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 403\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 404\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 405\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 406\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 407\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 408\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 409\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 417\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n",
347
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:101\u001b[0m, in \u001b[0;36m_deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m message \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m custom_message\n\u001b[1;32m 100\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message, \u001b[38;5;167;01mFutureWarning\u001b[39;00m)\n\u001b[0;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
348
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
349
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1240\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m 1220\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_local_dir(\n\u001b[1;32m 1221\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[1;32m 1222\u001b[0m local_dir\u001b[38;5;241m=\u001b[39mlocal_dir,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[1;32m 1238\u001b[0m )\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1241\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m 1242\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1243\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m 1244\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1246\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1247\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1248\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m 1249\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1250\u001b[0m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1251\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1252\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1253\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1254\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m 1255\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1256\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1257\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
350
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1389\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m 1387\u001b[0m Path(lock_path)\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1388\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m WeakFileLock(lock_path):\n\u001b[0;32m-> 1389\u001b[0m \u001b[43m_download_to_tmp_and_move\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1390\u001b[0m \u001b[43m \u001b[49m\u001b[43mincomplete_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblob_path\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.incomplete\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1391\u001b[0m \u001b[43m \u001b[49m\u001b[43mdestination_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblob_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1392\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_to_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl_to_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1393\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1394\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1395\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpected_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1397\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1398\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1399\u001b[0m _create_symlink(blob_path, pointer_path, new_blob\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1401\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pointer_path\n",
351
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1915\u001b[0m, in \u001b[0;36m_download_to_tmp_and_move\u001b[0;34m(incomplete_path, destination_path, url_to_download, proxies, headers, expected_size, filename, force_download)\u001b[0m\n\u001b[1;32m 1912\u001b[0m _check_disk_space(expected_size, incomplete_path\u001b[38;5;241m.\u001b[39mparent)\n\u001b[1;32m 1913\u001b[0m _check_disk_space(expected_size, destination_path\u001b[38;5;241m.\u001b[39mparent)\n\u001b[0;32m-> 1915\u001b[0m \u001b[43mhttp_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1916\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_to_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1917\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1918\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1919\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1920\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1921\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpected_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1922\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1924\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownload complete. Moving file to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdestination_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1925\u001b[0m _chmod_and_move(incomplete_path, destination_path)\n",
352
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:549\u001b[0m, in \u001b[0;36mhttp_get\u001b[0;34m(url, temp_file, proxies, resume_size, headers, expected_size, displayed_filename, _nb_retries, _tqdm_bar)\u001b[0m\n\u001b[1;32m 547\u001b[0m new_resume_size \u001b[38;5;241m=\u001b[39m resume_size\n\u001b[1;32m 548\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 549\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m r\u001b[38;5;241m.\u001b[39miter_content(chunk_size\u001b[38;5;241m=\u001b[39mDOWNLOAD_CHUNK_SIZE):\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk: \u001b[38;5;66;03m# filter out keep-alive new chunks\u001b[39;00m\n\u001b[1;32m 551\u001b[0m progress\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n",
353
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 819\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
354
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:628\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp):\n\u001b[0;32m--> 628\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m data\n",
355
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:567\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 564\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m--> 567\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 569\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
356
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:533\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n\u001b[1;32m 531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 532\u001b[0m \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m--> 533\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
357
+ "File \u001b[0;32m/usr/lib/python3.10/http/client.py:466\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 466\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 467\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
358
+ "File \u001b[0;32m/usr/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
359
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1275\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
360
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
361
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
362
+ ]
363
+ }
364
+ ],
365
+ "source": [
366
+ "# Load model directly\n",
367
+ "from transformers import AutoTokenizer, AutoModel\n",
368
+ "\n",
369
+ "tokenizer = AutoTokenizer.from_pretrained(\"OpenOpenAI/checkpoint-2500\")\n",
370
+ "model = AutoModel.from_pretrained(\"OpenOpenAI/checkpoint-2500\")"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "id": "d3443f20-cd0e-4fa0-97ab-a69f73c72be6",
377
+ "metadata": {},
378
+ "outputs": [],
379
+ "source": []
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "id": "65b919ca-4d3c-49bf-a0bc-4240defb84d1",
385
+ "metadata": {},
386
+ "outputs": [],
387
+ "source": []
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": 22,
392
+ "id": "5000a1a2-a562-403e-af50-9477a3ecbcbb",
393
+ "metadata": {},
394
+ "outputs": [
395
+ {
396
+ "data": {
397
+ "application/vnd.jupyter.widget-view+json": {
398
+ "model_id": "ce2c8a458e2f48209383c557f58f1e5d",
399
+ "version_major": 2,
400
+ "version_minor": 0
401
+ },
402
+ "text/plain": [
403
+ "config.json: 0%| | 0.00/743 [00:00<?, ?B/s]"
404
+ ]
405
+ },
406
+ "metadata": {},
407
+ "output_type": "display_data"
408
+ },
409
+ {
410
+ "data": {
411
+ "application/vnd.jupyter.widget-view+json": {
412
+ "model_id": "eae84f82c9304d9f8af556958830f436",
413
+ "version_major": 2,
414
+ "version_minor": 0
415
+ },
416
+ "text/plain": [
417
+ "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
418
+ ]
419
+ },
420
+ "metadata": {},
421
+ "output_type": "display_data"
422
+ },
423
+ {
424
+ "data": {
425
+ "application/vnd.jupyter.widget-view+json": {
426
+ "model_id": "7b95d784fc494882852373c34cbd4019",
427
+ "version_major": 2,
428
+ "version_minor": 0
429
+ },
430
+ "text/plain": [
431
+ "model-00001-of-00002.safetensors: 5%|4 | 241M/4.98G [00:00<?, ?B/s]"
432
+ ]
433
+ },
434
+ "metadata": {},
435
+ "output_type": "display_data"
436
+ },
437
+ {
438
+ "ename": "KeyboardInterrupt",
439
+ "evalue": "",
440
+ "output_type": "error",
441
+ "traceback": [
442
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
443
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
444
+ "Cell \u001b[0;32mIn[22], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModel\n\u001b[1;32m 4\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOpenOpenAI/checkpoint-2500\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mOpenOpenAI/checkpoint-2500\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
445
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n",
446
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:3658\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3655\u001b[0m \u001b[38;5;66;03m# We'll need to download and cache each checkpoint shard if the checkpoint is sharded.\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_sharded:\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;66;03m# resolved_archive_file becomes a list of files that point to the different checkpoint shards in this case.\u001b[39;00m\n\u001b[0;32m-> 3658\u001b[0m resolved_archive_file, sharded_metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_checkpoint_shard_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3659\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3660\u001b[0m \u001b[43m \u001b[49m\u001b[43mresolved_archive_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3661\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3662\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3663\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3664\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3665\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3666\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3667\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3668\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3669\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3670\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3671\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3673\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 3674\u001b[0m is_safetensors_available()\n\u001b[1;32m 3675\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resolved_archive_file, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m 3676\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m resolved_archive_file\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.safetensors\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3677\u001b[0m ):\n\u001b[1;32m 3678\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m safe_open(resolved_archive_file, framework\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n",
447
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:1079\u001b[0m, in \u001b[0;36mget_checkpoint_shard_files\u001b[0;34m(pretrained_model_name_or_path, index_filename, cache_dir, force_download, proxies, resume_download, local_files_only, token, user_agent, revision, subfolder, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 1076\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m shard_filename \u001b[38;5;129;01min\u001b[39;00m tqdm(shard_filenames, desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading shards\u001b[39m\u001b[38;5;124m\"\u001b[39m, disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m show_progress_bar):\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1078\u001b[0m \u001b[38;5;66;03m# Load from URL\u001b[39;00m\n\u001b[0;32m-> 1079\u001b[0m cached_filename \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1080\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1081\u001b[0m \u001b[43m \u001b[49m\u001b[43mshard_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1082\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1083\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1084\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1085\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1086\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1087\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1088\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1089\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1090\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_commit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;66;03m# We have already dealt with RepositoryNotFoundError and RevisionNotFoundError when getting the index, so\u001b[39;00m\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;66;03m# we don't have to catch them here.\u001b[39;00m\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError:\n",
448
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:402\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 399\u001b[0m user_agent \u001b[38;5;241m=\u001b[39m http_user_agent(user_agent)\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 401\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 402\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 403\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 404\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 405\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 406\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 407\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 408\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 409\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 417\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n",
449
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:101\u001b[0m, in \u001b[0;36m_deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m message \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m custom_message\n\u001b[1;32m 100\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(message, \u001b[38;5;167;01mFutureWarning\u001b[39;00m)\n\u001b[0;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
450
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
451
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1240\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m 1220\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_local_dir(\n\u001b[1;32m 1221\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[1;32m 1222\u001b[0m local_dir\u001b[38;5;241m=\u001b[39mlocal_dir,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[1;32m 1238\u001b[0m )\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1241\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m 1242\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1243\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m 1244\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1245\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1246\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1247\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1248\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m 1249\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1250\u001b[0m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1251\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1252\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1253\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1254\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m 1255\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1256\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1257\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
452
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1389\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m 1387\u001b[0m Path(lock_path)\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1388\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m WeakFileLock(lock_path):\n\u001b[0;32m-> 1389\u001b[0m \u001b[43m_download_to_tmp_and_move\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1390\u001b[0m \u001b[43m \u001b[49m\u001b[43mincomplete_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblob_path\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.incomplete\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1391\u001b[0m \u001b[43m \u001b[49m\u001b[43mdestination_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblob_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1392\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_to_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl_to_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1393\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1394\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1395\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpected_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1397\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1398\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1399\u001b[0m _create_symlink(blob_path, pointer_path, new_blob\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1401\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pointer_path\n",
453
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1915\u001b[0m, in \u001b[0;36m_download_to_tmp_and_move\u001b[0;34m(incomplete_path, destination_path, url_to_download, proxies, headers, expected_size, filename, force_download)\u001b[0m\n\u001b[1;32m 1912\u001b[0m _check_disk_space(expected_size, incomplete_path\u001b[38;5;241m.\u001b[39mparent)\n\u001b[1;32m 1913\u001b[0m _check_disk_space(expected_size, destination_path\u001b[38;5;241m.\u001b[39mparent)\n\u001b[0;32m-> 1915\u001b[0m \u001b[43mhttp_get\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1916\u001b[0m \u001b[43m \u001b[49m\u001b[43murl_to_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1917\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1918\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1919\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1920\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1921\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpected_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1922\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1924\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownload complete. Moving file to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdestination_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1925\u001b[0m _chmod_and_move(incomplete_path, destination_path)\n",
454
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:549\u001b[0m, in \u001b[0;36mhttp_get\u001b[0;34m(url, temp_file, proxies, resume_size, headers, expected_size, displayed_filename, _nb_retries, _tqdm_bar)\u001b[0m\n\u001b[1;32m 547\u001b[0m new_resume_size \u001b[38;5;241m=\u001b[39m resume_size\n\u001b[1;32m 548\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 549\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m r\u001b[38;5;241m.\u001b[39miter_content(chunk_size\u001b[38;5;241m=\u001b[39mDOWNLOAD_CHUNK_SIZE):\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk: \u001b[38;5;66;03m# filter out keep-alive new chunks\u001b[39;00m\n\u001b[1;32m 551\u001b[0m progress\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n",
455
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 819\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
456
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:628\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp):\n\u001b[0;32m--> 628\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 630\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m data\n",
457
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:567\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 564\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m--> 567\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 569\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
458
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/urllib3/response.py:533\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n\u001b[1;32m 531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 532\u001b[0m \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m--> 533\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
459
+ "File \u001b[0;32m/usr/lib/python3.10/http/client.py:466\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 466\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 467\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
460
+ "File \u001b[0;32m/usr/lib/python3.10/socket.py:705\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 705\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
461
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1274\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1274\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1275\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1276\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
462
+ "File \u001b[0;32m/usr/lib/python3.10/ssl.py:1130\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1128\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1130\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
463
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
464
+ ]
465
+ }
466
+ ],
467
+ "source": [
468
+ "# Load model directly\n",
469
+ "from transformers import AutoTokenizer, AutoModel\n",
470
+ "\n",
471
+ "tokenizer = AutoTokenizer.from_pretrained(\"OpenOpenAI/checkpoint-2500\")\n",
472
+ "model = AutoModel.from_pretrained(\"OpenOpenAI/checkpoint-2500\")"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "code",
477
+ "execution_count": 23,
478
+ "id": "0f8a691a-15be-4ce2-9ea1-8758471f5824",
479
+ "metadata": {},
480
+ "outputs": [
481
+ {
482
+ "name": "stdout",
483
+ "output_type": "stream",
484
+ "text": [
485
+ "PhiModel(\n",
486
+ " (embed_tokens): Embedding(51200, 2048)\n",
487
+ " (embed_dropout): Dropout(p=0.0, inplace=False)\n",
488
+ " (layers): ModuleList(\n",
489
+ " (0-23): 24 x PhiDecoderLayer(\n",
490
+ " (self_attn): PhiAttention(\n",
491
+ " (q_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
492
+ " (k_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
493
+ " (v_proj): Linear(in_features=2048, out_features=2048, bias=True)\n",
494
+ " (dense): Linear(in_features=2048, out_features=2048, bias=True)\n",
495
+ " (rotary_emb): PhiRotaryEmbedding()\n",
496
+ " )\n",
497
+ " (mlp): PhiMLP(\n",
498
+ " (activation_fn): NewGELUActivation()\n",
499
+ " (fc1): Linear(in_features=2048, out_features=8192, bias=True)\n",
500
+ " (fc2): Linear(in_features=8192, out_features=2048, bias=True)\n",
501
+ " )\n",
502
+ " (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n",
503
+ " (resid_dropout): Dropout(p=0.0, inplace=False)\n",
504
+ " )\n",
505
+ " )\n",
506
+ " (final_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n",
507
+ ")\n"
508
+ ]
509
+ }
510
+ ],
511
+ "source": [
512
+ "print(model)"
513
+ ]
514
+ },
515
+ {
516
+ "cell_type": "code",
517
+ "execution_count": 13,
518
+ "id": "9da6361a-e291-4a91-a942-676f84292c43",
519
+ "metadata": {},
520
+ "outputs": [
521
+ {
522
+ "ename": "OSError",
523
+ "evalue": "Can't load tokenizer for 'OpenOpenAI/checkpoint-2500'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'OpenOpenAI/checkpoint-2500' is the correct path to a directory containing all relevant files for a CodeGenTokenizerFast tokenizer.",
524
+ "output_type": "error",
525
+ "traceback": [
526
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
527
+ "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
528
+ "Cell \u001b[0;32mIn[13], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModel\n\u001b[1;32m 3\u001b[0m model_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOpenOpenAI/checkpoint-2500\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# Replace with the correct path\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModel\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_name)\n",
529
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:915\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 912\u001b[0m tokenizer_class_py, tokenizer_class_fast \u001b[38;5;241m=\u001b[39m TOKENIZER_MAPPING[\u001b[38;5;28mtype\u001b[39m(config)]\n\u001b[1;32m 914\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_fast \u001b[38;5;129;01mand\u001b[39;00m (use_fast \u001b[38;5;129;01mor\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 915\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtokenizer_class_fast\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 917\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
530
+ "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2275\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m 2272\u001b[0m \u001b[38;5;66;03m# If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be\u001b[39;00m\n\u001b[1;32m 2273\u001b[0m \u001b[38;5;66;03m# loaded directly from the GGUF file.\u001b[39;00m\n\u001b[1;32m 2274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gguf_file:\n\u001b[0;32m-> 2275\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 2276\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2277\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2278\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2279\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2280\u001b[0m )\n\u001b[1;32m 2282\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 2283\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n",
531
+ "\u001b[0;31mOSError\u001b[0m: Can't load tokenizer for 'OpenOpenAI/checkpoint-2500'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'OpenOpenAI/checkpoint-2500' is the correct path to a directory containing all relevant files for a CodeGenTokenizerFast tokenizer."
532
+ ]
533
+ }
534
+ ],
535
+ "source": [
536
+ "from transformers import AutoTokenizer, AutoModel\n",
537
+ "\n",
538
+ "model_name = \"OpenOpenAI/checkpoint-2500\" # Replace with the correct path\n",
539
+ "\n",
540
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
541
+ "model = AutoModel.from_pretrained(model_name)"
542
+ ]
543
+ },
544
+ {
545
+ "cell_type": "code",
546
+ "execution_count": 25,
547
+ "id": "0da6e77a-bfb1-4e24-af67-1b76009f7289",
548
+ "metadata": {},
549
+ "outputs": [
550
+ {
551
+ "name": "stdout",
552
+ "output_type": "stream",
553
+ "text": [
554
+ "4.43.1\n"
555
+ ]
556
+ }
557
+ ],
558
+ "source": [
559
+ "import transformers\n",
560
+ "print(transformers.__version__)"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": null,
566
+ "id": "528f1b39-10d8-4d17-939e-5fb82c06223b",
567
+ "metadata": {},
568
+ "outputs": [],
569
+ "source": []
570
+ }
571
+ ],
572
+ "metadata": {
573
+ "kernelspec": {
574
+ "display_name": "Python 3 (ipykernel)",
575
+ "language": "python",
576
+ "name": "python3"
577
+ },
578
+ "language_info": {
579
+ "codemirror_mode": {
580
+ "name": "ipython",
581
+ "version": 3
582
+ },
583
+ "file_extension": ".py",
584
+ "mimetype": "text/x-python",
585
+ "name": "python",
586
+ "nbconvert_exporter": "python",
587
+ "pygments_lexer": "ipython3",
588
+ "version": "3.10.12"
589
+ }
590
+ },
591
+ "nbformat": 4,
592
+ "nbformat_minor": 5
593
+ }
added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/phi-1_5",
3
+ "architectures": [
4
+ "PhiForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "embd_pdrop": 0.0,
9
+ "eos_token_id": null,
10
+ "hidden_act": "gelu_new",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "phi",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 24,
19
+ "num_key_value_heads": 32,
20
+ "partial_rotary_factor": 0.5,
21
+ "qk_layernorm": false,
22
+ "resid_pdrop": 0.0,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.44.0",
28
+ "use_cache": true,
29
+ "vocab_size": 51200
30
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.44.0"
4
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128e396b3aa3c9e172a9ec8d07eacfeb9c787a1fe5a47d14e7665982ca0206bf
3
+ size 4984916152
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d10669ad60fb8ac3a3d03211ba8aa57195c7b112a47190fefe794af0f701482f
3
+ size 688204064
model.safetensors.index.json ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 5673082880
4
+ },
5
+ "weight_map": {
6
+ "lm_head.bias": "model-00002-of-00002.safetensors",
7
+ "lm_head.weight": "model-00002-of-00002.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
9
+ "model.final_layernorm.bias": "model-00002-of-00002.safetensors",
10
+ "model.final_layernorm.weight": "model-00002-of-00002.safetensors",
11
+ "model.layers.0.input_layernorm.bias": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.0.self_attn.dense.bias": "model-00001-of-00002.safetensors",
18
+ "model.layers.0.self_attn.dense.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
20
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
22
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
24
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.input_layernorm.bias": "model-00001-of-00002.safetensors",
26
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
28
+ "model.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
30
+ "model.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.1.self_attn.dense.bias": "model-00001-of-00002.safetensors",
32
+ "model.layers.1.self_attn.dense.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
34
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
36
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
38
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.10.input_layernorm.bias": "model-00001-of-00002.safetensors",
40
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
42
+ "model.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
44
+ "model.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.10.self_attn.dense.bias": "model-00001-of-00002.safetensors",
46
+ "model.layers.10.self_attn.dense.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
48
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
50
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
52
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.11.input_layernorm.bias": "model-00001-of-00002.safetensors",
54
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
56
+ "model.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
58
+ "model.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.11.self_attn.dense.bias": "model-00001-of-00002.safetensors",
60
+ "model.layers.11.self_attn.dense.weight": "model-00001-of-00002.safetensors",
61
+ "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
62
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
64
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
66
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.12.input_layernorm.bias": "model-00001-of-00002.safetensors",
68
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
70
+ "model.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
72
+ "model.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
73
+ "model.layers.12.self_attn.dense.bias": "model-00001-of-00002.safetensors",
74
+ "model.layers.12.self_attn.dense.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
76
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
78
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
80
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.13.input_layernorm.bias": "model-00001-of-00002.safetensors",
82
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
84
+ "model.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
85
+ "model.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
86
+ "model.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.13.self_attn.dense.bias": "model-00001-of-00002.safetensors",
88
+ "model.layers.13.self_attn.dense.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
90
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
92
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
94
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.14.input_layernorm.bias": "model-00001-of-00002.safetensors",
96
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
97
+ "model.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
98
+ "model.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
100
+ "model.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.14.self_attn.dense.bias": "model-00001-of-00002.safetensors",
102
+ "model.layers.14.self_attn.dense.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
104
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
106
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
108
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
109
+ "model.layers.15.input_layernorm.bias": "model-00001-of-00002.safetensors",
110
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
112
+ "model.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
114
+ "model.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.15.self_attn.dense.bias": "model-00001-of-00002.safetensors",
116
+ "model.layers.15.self_attn.dense.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
118
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
120
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
121
+ "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
122
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.16.input_layernorm.bias": "model-00001-of-00002.safetensors",
124
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
126
+ "model.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
127
+ "model.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
128
+ "model.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
129
+ "model.layers.16.self_attn.dense.bias": "model-00001-of-00002.safetensors",
130
+ "model.layers.16.self_attn.dense.weight": "model-00001-of-00002.safetensors",
131
+ "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
132
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
133
+ "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
134
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
135
+ "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
136
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
137
+ "model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors",
138
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
139
+ "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
140
+ "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
141
+ "model.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
142
+ "model.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
143
+ "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
144
+ "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
145
+ "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
146
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
148
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
+ "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors",
152
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
153
+ "model.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
154
+ "model.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
155
+ "model.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
156
+ "model.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
157
+ "model.layers.18.self_attn.dense.bias": "model-00001-of-00002.safetensors",
158
+ "model.layers.18.self_attn.dense.weight": "model-00001-of-00002.safetensors",
159
+ "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
160
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
161
+ "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
162
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
163
+ "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
164
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
165
+ "model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors",
166
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
167
+ "model.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
168
+ "model.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
169
+ "model.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
170
+ "model.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
171
+ "model.layers.19.self_attn.dense.bias": "model-00001-of-00002.safetensors",
172
+ "model.layers.19.self_attn.dense.weight": "model-00001-of-00002.safetensors",
173
+ "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
174
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
175
+ "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
176
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
177
+ "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
178
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
179
+ "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
180
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
181
+ "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
182
+ "model.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
183
+ "model.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
184
+ "model.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
185
+ "model.layers.2.self_attn.dense.bias": "model-00001-of-00002.safetensors",
186
+ "model.layers.2.self_attn.dense.weight": "model-00001-of-00002.safetensors",
187
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
188
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
189
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
190
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
191
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
192
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
193
+ "model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors",
194
+ "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
195
+ "model.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
196
+ "model.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
197
+ "model.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
198
+ "model.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
199
+ "model.layers.20.self_attn.dense.bias": "model-00001-of-00002.safetensors",
200
+ "model.layers.20.self_attn.dense.weight": "model-00001-of-00002.safetensors",
201
+ "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
202
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
203
+ "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
204
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
205
+ "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
206
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
207
+ "model.layers.21.input_layernorm.bias": "model-00001-of-00002.safetensors",
208
+ "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
209
+ "model.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
210
+ "model.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
211
+ "model.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
212
+ "model.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
213
+ "model.layers.21.self_attn.dense.bias": "model-00001-of-00002.safetensors",
214
+ "model.layers.21.self_attn.dense.weight": "model-00001-of-00002.safetensors",
215
+ "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
216
+ "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
217
+ "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
218
+ "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
219
+ "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
220
+ "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
221
+ "model.layers.22.input_layernorm.bias": "model-00002-of-00002.safetensors",
222
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
223
+ "model.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
224
+ "model.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
225
+ "model.layers.22.mlp.fc2.bias": "model-00002-of-00002.safetensors",
226
+ "model.layers.22.mlp.fc2.weight": "model-00002-of-00002.safetensors",
227
+ "model.layers.22.self_attn.dense.bias": "model-00001-of-00002.safetensors",
228
+ "model.layers.22.self_attn.dense.weight": "model-00001-of-00002.safetensors",
229
+ "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
230
+ "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
231
+ "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
232
+ "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
233
+ "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
234
+ "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
235
+ "model.layers.23.input_layernorm.bias": "model-00002-of-00002.safetensors",
236
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
237
+ "model.layers.23.mlp.fc1.bias": "model-00002-of-00002.safetensors",
238
+ "model.layers.23.mlp.fc1.weight": "model-00002-of-00002.safetensors",
239
+ "model.layers.23.mlp.fc2.bias": "model-00002-of-00002.safetensors",
240
+ "model.layers.23.mlp.fc2.weight": "model-00002-of-00002.safetensors",
241
+ "model.layers.23.self_attn.dense.bias": "model-00002-of-00002.safetensors",
242
+ "model.layers.23.self_attn.dense.weight": "model-00002-of-00002.safetensors",
243
+ "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
244
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
245
+ "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
246
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
248
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.layers.3.input_layernorm.bias": "model-00001-of-00002.safetensors",
250
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
251
+ "model.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
252
+ "model.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
253
+ "model.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
254
+ "model.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
255
+ "model.layers.3.self_attn.dense.bias": "model-00001-of-00002.safetensors",
256
+ "model.layers.3.self_attn.dense.weight": "model-00001-of-00002.safetensors",
257
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
258
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
259
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
260
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
261
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
262
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.layers.4.input_layernorm.bias": "model-00001-of-00002.safetensors",
264
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
265
+ "model.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
266
+ "model.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
267
+ "model.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
268
+ "model.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
269
+ "model.layers.4.self_attn.dense.bias": "model-00001-of-00002.safetensors",
270
+ "model.layers.4.self_attn.dense.weight": "model-00001-of-00002.safetensors",
271
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
272
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
273
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
274
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
275
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
276
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
277
+ "model.layers.5.input_layernorm.bias": "model-00001-of-00002.safetensors",
278
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
279
+ "model.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
280
+ "model.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
281
+ "model.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
282
+ "model.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
283
+ "model.layers.5.self_attn.dense.bias": "model-00001-of-00002.safetensors",
284
+ "model.layers.5.self_attn.dense.weight": "model-00001-of-00002.safetensors",
285
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
286
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
287
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
288
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
289
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
290
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
291
+ "model.layers.6.input_layernorm.bias": "model-00001-of-00002.safetensors",
292
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
293
+ "model.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
294
+ "model.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
295
+ "model.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
296
+ "model.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
297
+ "model.layers.6.self_attn.dense.bias": "model-00001-of-00002.safetensors",
298
+ "model.layers.6.self_attn.dense.weight": "model-00001-of-00002.safetensors",
299
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
300
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
301
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
302
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
303
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
304
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
305
+ "model.layers.7.input_layernorm.bias": "model-00001-of-00002.safetensors",
306
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
307
+ "model.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
308
+ "model.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
309
+ "model.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
310
+ "model.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
311
+ "model.layers.7.self_attn.dense.bias": "model-00001-of-00002.safetensors",
312
+ "model.layers.7.self_attn.dense.weight": "model-00001-of-00002.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
315
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
316
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
317
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
318
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
319
+ "model.layers.8.input_layernorm.bias": "model-00001-of-00002.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
321
+ "model.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
322
+ "model.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
323
+ "model.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
324
+ "model.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
325
+ "model.layers.8.self_attn.dense.bias": "model-00001-of-00002.safetensors",
326
+ "model.layers.8.self_attn.dense.weight": "model-00001-of-00002.safetensors",
327
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
328
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
329
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
330
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
331
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
332
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
333
+ "model.layers.9.input_layernorm.bias": "model-00001-of-00002.safetensors",
334
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
335
+ "model.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
336
+ "model.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
337
+ "model.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
338
+ "model.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
339
+ "model.layers.9.self_attn.dense.bias": "model-00001-of-00002.safetensors",
340
+ "model.layers.9.self_attn.dense.weight": "model-00001-of-00002.safetensors",
341
+ "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
342
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
343
+ "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
344
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
345
+ "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
346
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors"
347
+ }
348
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78eb917a57d03b385da971fa844e0db8a187a35335e36a1bc0e82e92302dd188
3
+ size 11346461349
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0598b42c3185859e3160b65043f9cfce81ce16cf2ef53e29340c5babc94362
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": " ",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "50258": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50259": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50260": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50261": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50262": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50263": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50264": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50265": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50266": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50267": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50268": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50269": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50270": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50271": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50272": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50273": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50274": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50275": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50276": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50277": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50278": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50279": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50280": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "50281": {
205
+ "content": " ",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "50282": {
213
+ "content": " ",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50283": {
221
+ "content": " ",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50284": {
229
+ "content": " ",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "50285": {
237
+ "content": " ",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "50286": {
245
+ "content": " ",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "50287": {
253
+ "content": "\t\t\t\t\t\t\t\t\t",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "50288": {
261
+ "content": "\t\t\t\t\t\t\t\t",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "50289": {
269
+ "content": "\t\t\t\t\t\t\t",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "50290": {
277
+ "content": "\t\t\t\t\t\t",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "50291": {
285
+ "content": "\t\t\t\t\t",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "50292": {
293
+ "content": "\t\t\t\t",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "50293": {
301
+ "content": "\t\t\t",
302
+ "lstrip": false,
303
+ "normalized": true,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "50294": {
309
+ "content": "\t\t",
310
+ "lstrip": false,
311
+ "normalized": true,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ }
316
+ },
317
+ "bos_token": "<|endoftext|>",
318
+ "clean_up_tokenization_spaces": true,
319
+ "eos_token": "<|endoftext|>",
320
+ "model_max_length": 2048,
321
+ "pad_token": "<|endoftext|>",
322
+ "return_token_type_ids": false,
323
+ "tokenizer_class": "CodeGenTokenizer",
324
+ "unk_token": "<|endoftext|>"
325
+ }
trainer_state.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.5,
5
+ "eval_steps": 500,
6
+ "global_step": 2500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5,
13
+ "grad_norm": 1.8131240606307983,
14
+ "learning_rate": 4.166666666666667e-05,
15
+ "loss": 2.8997,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "grad_norm": 1.7417197227478027,
21
+ "learning_rate": 3.3333333333333335e-05,
22
+ "loss": 2.8097,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.5,
27
+ "grad_norm": 2.1385185718536377,
28
+ "learning_rate": 2.5e-05,
29
+ "loss": 1.9273,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "grad_norm": 2.487093448638916,
35
+ "learning_rate": 1.6666666666666667e-05,
36
+ "loss": 1.9087,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 2.5,
41
+ "grad_norm": 2.675701856613159,
42
+ "learning_rate": 8.333333333333334e-06,
43
+ "loss": 1.0467,
44
+ "step": 2500
45
+ }
46
+ ],
47
+ "logging_steps": 500,
48
+ "max_steps": 3000,
49
+ "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 3,
51
+ "save_steps": 500,
52
+ "stateful_callbacks": {
53
+ "TrainerControl": {
54
+ "args": {
55
+ "should_epoch_stop": false,
56
+ "should_evaluate": false,
57
+ "should_log": false,
58
+ "should_save": true,
59
+ "should_training_stop": false
60
+ },
61
+ "attributes": {}
62
+ }
63
+ },
64
+ "total_flos": 4.016131522265088e+16,
65
+ "train_batch_size": 8,
66
+ "trial_name": null,
67
+ "trial_params": null
68
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b7a8beb226ab77bdefbae34ce5e59335bbf8ec896ef88a2d2c9eb8bf803cfd1
3
+ size 5368
vocab.json ADDED
The diff for this file is too large to render. See raw diff