,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=[],\n",
"resume_from_checkpoint=None,\n",
"run_name=/content/artifacts/checkpoints,\n",
"save_on_each_node=False,\n",
"save_safetensors=False,\n",
"save_steps=50,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
"torch_compile_backend=None,\n",
"torch_compile_mode=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_cpu=False,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.02,\n",
"warmup_steps=100,\n",
"weight_decay=0.0,\n",
")"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from utils import parse_training_args\n",
"\n",
"training_args = parse_training_args(params)\n",
"training_args"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2ae3e5f9-e28e-457b-b6bf-a62a472241bf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T05:55:53.899792Z",
"iopub.status.busy": "2023-10-21T05:55:53.899027Z",
"iopub.status.idle": "2023-10-21T05:55:53.902455Z",
"shell.execute_reply": "2023-10-21T05:55:53.901834Z"
},
"papermill": {
"duration": 1.100827,
"end_time": "2023-10-21T05:55:53.903903",
"exception": false,
"start_time": "2023-10-21T05:55:52.803076",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# data = data[\"train\"].train_test_split(test_size=0.1)\n",
"# data\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5bc91439-6108-445c-8f85-e6558c9f0677",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T05:55:56.641848Z",
"iopub.status.busy": "2023-10-21T05:55:56.641104Z",
"iopub.status.idle": "2023-10-21T05:55:56.906471Z",
"shell.execute_reply": "2023-10-21T05:55:56.905658Z"
},
"papermill": {
"duration": 1.310772,
"end_time": "2023-10-21T05:55:56.908127",
"exception": false,
"start_time": "2023-10-21T05:55:55.597355",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! mkdir -p {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "b33e407a-9d4f-49f6-a74b-b80db8cc3a8a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T05:55:58.965151Z",
"iopub.status.busy": "2023-10-21T05:55:58.964343Z",
"iopub.status.idle": "2023-10-21T07:47:28.532976Z",
"shell.execute_reply": "2023-10-21T07:47:28.532251Z"
},
"papermill": {
"duration": 6690.621815,
"end_time": "2023-10-21T07:47:28.534533",
"exception": false,
"start_time": "2023-10-21T05:55:57.912718",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [1119/1119 1:51:20, Epoch 2/3]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 50 | \n",
" 1.057700 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.489600 | \n",
"
\n",
" \n",
" 150 | \n",
" 0.260400 | \n",
"
\n",
" \n",
" 200 | \n",
" 0.147400 | \n",
"
\n",
" \n",
" 250 | \n",
" 0.081700 | \n",
"
\n",
" \n",
" 300 | \n",
" 0.057000 | \n",
"
\n",
" \n",
" 350 | \n",
" 0.041700 | \n",
"
\n",
" \n",
" 400 | \n",
" 0.037400 | \n",
"
\n",
" \n",
" 450 | \n",
" 0.033400 | \n",
"
\n",
" \n",
" 500 | \n",
" 0.029200 | \n",
"
\n",
" \n",
" 550 | \n",
" 0.027600 | \n",
"
\n",
" \n",
" 600 | \n",
" 0.027100 | \n",
"
\n",
" \n",
" 650 | \n",
" 0.025000 | \n",
"
\n",
" \n",
" 700 | \n",
" 0.024600 | \n",
"
\n",
" \n",
" 750 | \n",
" 0.024500 | \n",
"
\n",
" \n",
" 800 | \n",
" 0.020400 | \n",
"
\n",
" \n",
" 850 | \n",
" 0.020500 | \n",
"
\n",
" \n",
" 900 | \n",
" 0.020800 | \n",
"
\n",
" \n",
" 950 | \n",
" 0.020800 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.020900 | \n",
"
\n",
" \n",
" 1050 | \n",
" 0.021100 | \n",
"
\n",
" \n",
" 1100 | \n",
" 0.020300 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=1119, training_loss=0.11247422747893245, metrics={'train_runtime': 6689.036, 'train_samples_per_second': 0.67, 'train_steps_per_second': 0.167, 'total_flos': 1.4445806420565197e+17, 'train_loss': 0.11247422747893245, 'epoch': 3.0})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer = transformers.Trainer(\n",
" model=model,\n",
" train_dataset=data[\"train\"],\n",
"# eval_dataset=data[\"test\"],\n",
" args=training_args,\n",
" data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
")\n",
"model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n",
"\n",
"checkpoint_path = Path(\"/content/artifacts/checkpoints\")\n",
"\n",
"# Only set resume_from_checkpoint True when directory exists and contains files\n",
"resume_from_checkpoint = checkpoint_path.is_dir() and any(checkpoint_path.iterdir())\n",
"if resume_from_checkpoint:\n",
" print(\"Resuming from checkpoint:\", list(checkpoint_path.rglob(\"\")))\n",
"trainer.train(resume_from_checkpoint=resume_from_checkpoint)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "172e47a7-400e-4f82-a5e3-38135ecf532f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:47:30.428814Z",
"iopub.status.busy": "2023-10-21T07:47:30.428055Z",
"iopub.status.idle": "2023-10-21T07:47:46.873882Z",
"shell.execute_reply": "2023-10-21T07:47:46.873193Z"
},
"papermill": {
"duration": 17.445662,
"end_time": "2023-10-21T07:47:46.875405",
"exception": false,
"start_time": "2023-10-21T07:47:29.429743",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"PeftModelForCausalLM(\n",
" (base_model): LoraModel(\n",
" (model): LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): ModulesToSaveWrapper(\n",
" (original_module): Embedding(32001, 4096)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Embedding(32001, 4096)\n",
" )\n",
" )\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (k_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (v_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (o_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (up_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (down_proj): Linear(\n",
" in_features=11008, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=11008, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): ModulesToSaveWrapper(\n",
" (original_module): Linear(in_features=4096, out_features=32001, bias=False)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=32001, bias=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path_lora)\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "dea4e68e-57a7-48bd-bad9-f03dfe3f8a06",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:47:48.699946Z",
"iopub.status.busy": "2023-10-21T07:47:48.699212Z",
"iopub.status.idle": "2023-10-21T07:47:48.949489Z",
"shell.execute_reply": "2023-10-21T07:47:48.948666Z"
},
"papermill": {
"duration": 1.175557,
"end_time": "2023-10-21T07:47:48.950963",
"exception": false,
"start_time": "2023-10-21T07:47:47.775406",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 1.2G\r\n",
" 512 -rw-r--r-- 1 root 3003 88 Oct 21 07:47 README.md\r\n",
"1.0K -rw-r--r-- 1 root 3003 550 Oct 21 07:47 adapter_config.json\r\n",
"1.2G -rw-r--r-- 1 root 3003 1.2G Oct 21 07:47 adapter_model.bin\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "09db36b7-ead6-4368-9bfb-13ba1ba800a5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:47:50.799568Z",
"iopub.status.busy": "2023-10-21T07:47:50.798899Z",
"iopub.status.idle": "2023-10-21T07:48:42.484011Z",
"shell.execute_reply": "2023-10-21T07:48:42.483286Z"
},
"papermill": {
"duration": 53.672087,
"end_time": "2023-10-21T07:48:43.522023",
"exception": false,
"start_time": "2023-10-21T07:47:49.849936",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): Embedding(32001, 4096)\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): Linear(in_features=4096, out_features=32001, bias=False)\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = model.merge_and_unload().half()\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "270a9a72-3a12-4d83-aa7d-2d167cb28cb4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:48:45.317087Z",
"iopub.status.busy": "2023-10-21T07:48:45.316765Z",
"iopub.status.idle": "2023-10-21T07:48:45.559747Z",
"shell.execute_reply": "2023-10-21T07:48:45.558874Z"
},
"papermill": {
"duration": 1.11534,
"end_time": "2023-10-21T07:48:45.561396",
"exception": false,
"start_time": "2023-10-21T07:48:44.446056",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 0\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 21 05:55 checkpoints\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 21 05:55 lora\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 21 05:49 src\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -l {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "260e9d79-6eb8-4516-bf8f-825a25606391",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:48:47.429854Z",
"iopub.status.busy": "2023-10-21T07:48:47.429062Z",
"iopub.status.idle": "2023-10-21T07:51:23.634942Z",
"shell.execute_reply": "2023-10-21T07:51:23.634264Z"
},
"papermill": {
"duration": 158.141645,
"end_time": "2023-10-21T07:51:24.665966",
"exception": false,
"start_time": "2023-10-21T07:48:46.524321",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"('/content/artifacts/tokenizer_config.json',\n",
" '/content/artifacts/special_tokens_map.json',\n",
" '/content/artifacts/tokenizer.model',\n",
" '/content/artifacts/added_tokens.json',\n",
" '/content/artifacts/tokenizer.json')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path)\n",
"tokenizer.save_pretrained(trained_model_path)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "6d90a920-fb22-4291-8466-411ff41e31be",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:51:26.557278Z",
"iopub.status.busy": "2023-10-21T07:51:26.556503Z",
"iopub.status.idle": "2023-10-21T07:51:26.796901Z",
"shell.execute_reply": "2023-10-21T07:51:26.796120Z"
},
"papermill": {
"duration": 1.217017,
"end_time": "2023-10-21T07:51:26.798456",
"exception": false,
"start_time": "2023-10-21T07:51:25.581439",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 13G\r\n",
" 512 -rw-r--r-- 1 root 3003 21 Oct 21 07:51 added_tokens.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 21 05:55 checkpoints\r\n",
"1.0K -rw-r--r-- 1 root 3003 648 Oct 21 07:48 config.json\r\n",
" 512 -rw-r--r-- 1 root 3003 183 Oct 21 07:48 generation_config.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 21 05:55 lora\r\n",
"9.3G -rw-r--r-- 1 root 3003 9.3G Oct 21 07:49 pytorch_model-00001-of-00002.bin\r\n",
"3.3G -rw-r--r-- 1 root 3003 3.3G Oct 21 07:50 pytorch_model-00002-of-00002.bin\r\n",
" 24K -rw-r--r-- 1 root 3003 24K Oct 21 07:51 pytorch_model.bin.index.json\r\n",
"1.0K -rw-r--r-- 1 root 3003 552 Oct 21 07:51 special_tokens_map.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 21 05:49 src\r\n",
"1.8M -rw-r--r-- 1 root 3003 1.8M Oct 21 07:51 tokenizer.json\r\n",
"489K -rw-r--r-- 1 root 3003 489K Oct 21 07:51 tokenizer.model\r\n",
"1.5K -rw-r--r-- 1 root 3003 1.1K Oct 21 07:51 tokenizer_config.json\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "202a694a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-21T07:51:28.659968Z",
"iopub.status.busy": "2023-10-21T07:51:28.659180Z"
},
"papermill": {
"duration": null,
"end_time": null,
"exception": false,
"start_time": "2023-10-21T07:51:27.744359",
"status": "running"
},
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "671a3174ba724efd8ea3d2b141b75b98",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00002-of-00002.bin: 0%| | 0.00/3.50G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f1a97636b33c4485bb557cf32d97950f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "268a578f3d774938990b28374f4d3957",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00001-of-00002.bin: 0%| | 0.00/9.98G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from huggingface_hub import HfApi\n",
"import shutil\n",
"\n",
"tokenizer_model_path_base = Path(model_path) / \"tokenizer.model\"\n",
"tokenizer_model_path_trained = Path(trained_model_path) / \"tokenizer.model\"\n",
"if tokenizer_model_path_base.exists() and not tokenizer_model_path_trained.exists():\n",
" shutil.copy(tokenizer_model_path_base, tokenizer_model_path_trained)\n",
"\n",
"repo_id = params.get(\"push_to_hub\")\n",
"if repo_id:\n",
" model.push_to_hub(repo_id)\n",
" tokenizer.push_to_hub(repo_id)\n",
" hf_api = HfApi()\n",
" # Upload tokenizer.model if it was in base model\n",
" if tokenizer_model_path_base.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=tokenizer_model_path_base,\n",
" path_in_repo=tokenizer_model_path_base.name,\n",
" repo_id=repo_id,\n",
" )\n",
" logs_path = Path(\"/content/artifacts/src/train.ipynb\")\n",
" if logs_path.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=logs_path,\n",
" path_in_repo=logs_path.name,\n",
" repo_id=repo_id,\n",
" )\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"papermill": {
"default_parameters": {},
"duration": null,
"end_time": null,
"environment_variables": {},
"exception": null,
"input_path": "/content/src/train.ipynb",
"output_path": "/content/artifacts/src/train.ipynb",
"parameters": {},
"start_time": "2023-10-21T05:49:13.952191",
"version": "2.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}