,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=[],\n",
"resume_from_checkpoint=None,\n",
"run_name=/content/artifacts/checkpoints,\n",
"save_on_each_node=False,\n",
"save_safetensors=False,\n",
"save_steps=50,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
"torch_compile_backend=None,\n",
"torch_compile_mode=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_cpu=False,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.02,\n",
"warmup_steps=100,\n",
"weight_decay=0.0,\n",
")"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from utils import parse_training_args\n",
"\n",
"training_args = parse_training_args(params)\n",
"training_args"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2ae3e5f9-e28e-457b-b6bf-a62a472241bf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T00:39:52.733240Z",
"iopub.status.busy": "2023-10-22T00:39:52.732528Z",
"iopub.status.idle": "2023-10-22T00:39:52.735862Z",
"shell.execute_reply": "2023-10-22T00:39:52.735243Z"
},
"papermill": {
"duration": 1.548798,
"end_time": "2023-10-22T00:39:52.737292",
"exception": false,
"start_time": "2023-10-22T00:39:51.188494",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# data = data[\"train\"].train_test_split(test_size=0.1)\n",
"# data\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5bc91439-6108-445c-8f85-e6558c9f0677",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T00:39:54.811779Z",
"iopub.status.busy": "2023-10-22T00:39:54.811180Z",
"iopub.status.idle": "2023-10-22T00:39:55.100635Z",
"shell.execute_reply": "2023-10-22T00:39:55.099818Z"
},
"papermill": {
"duration": 1.304129,
"end_time": "2023-10-22T00:39:55.102252",
"exception": false,
"start_time": "2023-10-22T00:39:53.798123",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! mkdir -p {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "b33e407a-9d4f-49f6-a74b-b80db8cc3a8a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T00:39:57.094945Z",
"iopub.status.busy": "2023-10-22T00:39:57.094146Z",
"iopub.status.idle": "2023-10-22T04:52:40.595862Z",
"shell.execute_reply": "2023-10-22T04:52:40.595167Z"
},
"papermill": {
"duration": 15165.144124,
"end_time": "2023-10-22T04:52:41.273343",
"exception": false,
"start_time": "2023-10-22T00:39:56.129219",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [2391/2391 4:12:34, Epoch 2/3]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 50 | \n",
" 1.069300 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.515300 | \n",
"
\n",
" \n",
" 150 | \n",
" 0.273700 | \n",
"
\n",
" \n",
" 200 | \n",
" 0.173300 | \n",
"
\n",
" \n",
" 250 | \n",
" 0.118800 | \n",
"
\n",
" \n",
" 300 | \n",
" 0.084200 | \n",
"
\n",
" \n",
" 350 | \n",
" 0.065800 | \n",
"
\n",
" \n",
" 400 | \n",
" 0.054500 | \n",
"
\n",
" \n",
" 450 | \n",
" 0.048400 | \n",
"
\n",
" \n",
" 500 | \n",
" 0.044200 | \n",
"
\n",
" \n",
" 550 | \n",
" 0.040000 | \n",
"
\n",
" \n",
" 600 | \n",
" 0.039400 | \n",
"
\n",
" \n",
" 650 | \n",
" 0.038100 | \n",
"
\n",
" \n",
" 700 | \n",
" 0.034100 | \n",
"
\n",
" \n",
" 750 | \n",
" 0.034400 | \n",
"
\n",
" \n",
" 800 | \n",
" 0.032600 | \n",
"
\n",
" \n",
" 850 | \n",
" 0.027300 | \n",
"
\n",
" \n",
" 900 | \n",
" 0.026700 | \n",
"
\n",
" \n",
" 950 | \n",
" 0.027900 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.026800 | \n",
"
\n",
" \n",
" 1050 | \n",
" 0.026300 | \n",
"
\n",
" \n",
" 1100 | \n",
" 0.026900 | \n",
"
\n",
" \n",
" 1150 | \n",
" 0.026100 | \n",
"
\n",
" \n",
" 1200 | \n",
" 0.025400 | \n",
"
\n",
" \n",
" 1250 | \n",
" 0.023900 | \n",
"
\n",
" \n",
" 1300 | \n",
" 0.025000 | \n",
"
\n",
" \n",
" 1350 | \n",
" 0.024000 | \n",
"
\n",
" \n",
" 1400 | \n",
" 0.025600 | \n",
"
\n",
" \n",
" 1450 | \n",
" 0.024300 | \n",
"
\n",
" \n",
" 1500 | \n",
" 0.023100 | \n",
"
\n",
" \n",
" 1550 | \n",
" 0.024800 | \n",
"
\n",
" \n",
" 1600 | \n",
" 0.023300 | \n",
"
\n",
" \n",
" 1650 | \n",
" 0.019400 | \n",
"
\n",
" \n",
" 1700 | \n",
" 0.019600 | \n",
"
\n",
" \n",
" 1750 | \n",
" 0.020400 | \n",
"
\n",
" \n",
" 1800 | \n",
" 0.019600 | \n",
"
\n",
" \n",
" 1850 | \n",
" 0.019300 | \n",
"
\n",
" \n",
" 1900 | \n",
" 0.019600 | \n",
"
\n",
" \n",
" 1950 | \n",
" 0.018600 | \n",
"
\n",
" \n",
" 2000 | \n",
" 0.019400 | \n",
"
\n",
" \n",
" 2050 | \n",
" 0.020000 | \n",
"
\n",
" \n",
" 2100 | \n",
" 0.020300 | \n",
"
\n",
" \n",
" 2150 | \n",
" 0.019400 | \n",
"
\n",
" \n",
" 2200 | \n",
" 0.019300 | \n",
"
\n",
" \n",
" 2250 | \n",
" 0.019800 | \n",
"
\n",
" \n",
" 2300 | \n",
" 0.019300 | \n",
"
\n",
" \n",
" 2350 | \n",
" 0.019500 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=2391, training_loss=0.07075354540412868, metrics={'train_runtime': 15162.9574, 'train_samples_per_second': 0.631, 'train_steps_per_second': 0.158, 'total_flos': 3.0420974601928704e+17, 'train_loss': 0.07075354540412868, 'epoch': 3.0})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer = transformers.Trainer(\n",
" model=model,\n",
" train_dataset=data[\"train\"],\n",
"# eval_dataset=data[\"test\"],\n",
" args=training_args,\n",
" data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
")\n",
"model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n",
"\n",
"checkpoint_path = Path(\"/content/artifacts/checkpoints\")\n",
"\n",
"# Only set resume_from_checkpoint True when directory exists and contains files\n",
"resume_from_checkpoint = checkpoint_path.is_dir() and any(checkpoint_path.iterdir())\n",
"if resume_from_checkpoint:\n",
" print(\"Resuming from checkpoint:\", list(checkpoint_path.rglob(\"\")))\n",
"trainer.train(resume_from_checkpoint=resume_from_checkpoint)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "172e47a7-400e-4f82-a5e3-38135ecf532f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:52:43.424539Z",
"iopub.status.busy": "2023-10-22T04:52:43.423767Z",
"iopub.status.idle": "2023-10-22T04:53:03.150108Z",
"shell.execute_reply": "2023-10-22T04:53:03.149387Z"
},
"papermill": {
"duration": 21.909882,
"end_time": "2023-10-22T04:53:04.171757",
"exception": false,
"start_time": "2023-10-22T04:52:42.261875",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"PeftModelForCausalLM(\n",
" (base_model): LoraModel(\n",
" (model): LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): ModulesToSaveWrapper(\n",
" (original_module): Embedding(32001, 4096)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Embedding(32001, 4096)\n",
" )\n",
" )\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (k_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (v_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (o_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (up_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (down_proj): Linear(\n",
" in_features=11008, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=11008, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): ModulesToSaveWrapper(\n",
" (original_module): Linear(in_features=4096, out_features=32001, bias=False)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=32001, bias=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path_lora)\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "dea4e68e-57a7-48bd-bad9-f03dfe3f8a06",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:53:06.109528Z",
"iopub.status.busy": "2023-10-22T04:53:06.108736Z",
"iopub.status.idle": "2023-10-22T04:53:06.356699Z",
"shell.execute_reply": "2023-10-22T04:53:06.355856Z"
},
"papermill": {
"duration": 1.205767,
"end_time": "2023-10-22T04:53:06.358311",
"exception": false,
"start_time": "2023-10-22T04:53:05.152544",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 1.2G\r\n",
" 512 -rw-r--r-- 1 root 3003 88 Oct 22 04:52 README.md\r\n",
"1.0K -rw-r--r-- 1 root 3003 550 Oct 22 04:53 adapter_config.json\r\n",
"1.2G -rw-r--r-- 1 root 3003 1.2G Oct 22 04:52 adapter_model.bin\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "09db36b7-ead6-4368-9bfb-13ba1ba800a5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:53:08.298375Z",
"iopub.status.busy": "2023-10-22T04:53:08.297543Z",
"iopub.status.idle": "2023-10-22T04:54:00.325080Z",
"shell.execute_reply": "2023-10-22T04:54:00.324374Z"
},
"papermill": {
"duration": 54.039738,
"end_time": "2023-10-22T04:54:01.415212",
"exception": false,
"start_time": "2023-10-22T04:53:07.375474",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): Embedding(32001, 4096)\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): Linear(in_features=4096, out_features=32001, bias=False)\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = model.merge_and_unload().half()\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "270a9a72-3a12-4d83-aa7d-2d167cb28cb4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:54:03.491633Z",
"iopub.status.busy": "2023-10-22T04:54:03.490944Z",
"iopub.status.idle": "2023-10-22T04:54:03.732210Z",
"shell.execute_reply": "2023-10-22T04:54:03.731396Z"
},
"papermill": {
"duration": 1.235829,
"end_time": "2023-10-22T04:54:03.733715",
"exception": false,
"start_time": "2023-10-22T04:54:02.497886",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 0\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 22 00:39 checkpoints\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 22 00:39 lora\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 22 00:33 src\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -l {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "260e9d79-6eb8-4516-bf8f-825a25606391",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:54:05.862618Z",
"iopub.status.busy": "2023-10-22T04:54:05.861702Z",
"iopub.status.idle": "2023-10-22T04:56:43.569718Z",
"shell.execute_reply": "2023-10-22T04:56:43.569054Z"
},
"papermill": {
"duration": 159.765833,
"end_time": "2023-10-22T04:56:44.594302",
"exception": false,
"start_time": "2023-10-22T04:54:04.828469",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"('/content/artifacts/tokenizer_config.json',\n",
" '/content/artifacts/special_tokens_map.json',\n",
" '/content/artifacts/tokenizer.model',\n",
" '/content/artifacts/added_tokens.json',\n",
" '/content/artifacts/tokenizer.json')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path)\n",
"tokenizer.save_pretrained(trained_model_path)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "6d90a920-fb22-4291-8466-411ff41e31be",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:56:46.484015Z",
"iopub.status.busy": "2023-10-22T04:56:46.483256Z",
"iopub.status.idle": "2023-10-22T04:56:46.762038Z",
"shell.execute_reply": "2023-10-22T04:56:46.761255Z"
},
"papermill": {
"duration": 1.264259,
"end_time": "2023-10-22T04:56:46.763647",
"exception": false,
"start_time": "2023-10-22T04:56:45.499388",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 13G\r\n",
" 512 -rw-r--r-- 1 root 3003 21 Oct 22 04:56 added_tokens.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 22 00:39 checkpoints\r\n",
"1.0K -rw-r--r-- 1 root 3003 648 Oct 22 04:54 config.json\r\n",
" 512 -rw-r--r-- 1 root 3003 183 Oct 22 04:54 generation_config.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 22 00:39 lora\r\n",
"9.3G -rw-r--r-- 1 root 3003 9.3G Oct 22 04:54 pytorch_model-00001-of-00002.bin\r\n",
"3.3G -rw-r--r-- 1 root 3003 3.3G Oct 22 04:56 pytorch_model-00002-of-00002.bin\r\n",
" 24K -rw-r--r-- 1 root 3003 24K Oct 22 04:56 pytorch_model.bin.index.json\r\n",
"1.0K -rw-r--r-- 1 root 3003 552 Oct 22 04:56 special_tokens_map.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 22 00:33 src\r\n",
"1.8M -rw-r--r-- 1 root 3003 1.8M Oct 22 04:56 tokenizer.json\r\n",
"489K -rw-r--r-- 1 root 3003 489K Oct 22 04:56 tokenizer.model\r\n",
"1.5K -rw-r--r-- 1 root 3003 1.1K Oct 22 04:56 tokenizer_config.json\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "202a694a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-22T04:56:48.598532Z",
"iopub.status.busy": "2023-10-22T04:56:48.597715Z"
},
"papermill": {
"duration": null,
"end_time": null,
"exception": false,
"start_time": "2023-10-22T04:56:47.688302",
"status": "running"
},
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fd6c61a8f74449ab8cd067d50d2265ad",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7fdcfa71a5bf459f8a08db3d2d018d9f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00001-of-00002.bin: 0%| | 0.00/9.98G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8d8a26f4fedf4be697918941db33850b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00002-of-00002.bin: 0%| | 0.00/3.50G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from huggingface_hub import HfApi\n",
"import shutil\n",
"\n",
"tokenizer_model_path_base = Path(model_path) / \"tokenizer.model\"\n",
"tokenizer_model_path_trained = Path(trained_model_path) / \"tokenizer.model\"\n",
"if tokenizer_model_path_base.exists() and not tokenizer_model_path_trained.exists():\n",
" shutil.copy(tokenizer_model_path_base, tokenizer_model_path_trained)\n",
"\n",
"repo_id = params.get(\"push_to_hub\")\n",
"if repo_id:\n",
" model.push_to_hub(repo_id)\n",
" tokenizer.push_to_hub(repo_id)\n",
" hf_api = HfApi()\n",
" # Upload tokenizer.model if it was in base model\n",
" if tokenizer_model_path_base.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=tokenizer_model_path_base,\n",
" path_in_repo=tokenizer_model_path_base.name,\n",
" repo_id=repo_id,\n",
" )\n",
" logs_path = Path(\"/content/artifacts/src/train.ipynb\")\n",
" if logs_path.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=logs_path,\n",
" path_in_repo=logs_path.name,\n",
" repo_id=repo_id,\n",
" )\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"papermill": {
"default_parameters": {},
"duration": null,
"end_time": null,
"environment_variables": {},
"exception": null,
"input_path": "/content/src/train.ipynb",
"output_path": "/content/artifacts/src/train.ipynb",
"parameters": {},
"start_time": "2023-10-22T00:33:15.528186",
"version": "2.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}