{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a61725a4-1744-40af-8895-c092b447f315",
   "metadata": {},
   "outputs": [],
   "source": [
    "#pip install transformers datasets evaluate seqeval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bfbe03a5-8199-4fa6-851f-bdb3bc2069bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5299508821ac4190be635084237878d1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b33ad12c-0487-4eff-995d-d3e27756c1af",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading builder script: 100%|█████████████████████████████████████████████████████████| 7.46k/7.46k [00:00<?, ?B/s]\n",
      "Downloading metadata: 100%|███████████████████████████████████████████████████████████████| 4.28k/4.28k [00:00<?, ?B/s]\n",
      "Downloading readme: 100%|█████████████████████████████████████████████████████████████████| 9.05k/9.05k [00:00<?, ?B/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset wnut_17/wnut_17 to C:/Users/Vadim/.cache/huggingface/datasets/wnut_17/wnut_17/1.0.0/077c7f08b8dbc800692e8c9186cdf3606d5849ab0e7be662e6135bb10eba54f9...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading data files:   0%|                                                                    | 0/3 [00:00<?, ?it/s]\n",
      "Downloading data: 494kB [00:00, 6.32MB/s]                                                                              \u001b[A\n",
      "Downloading data files:  33%|████████████████████                                        | 1/3 [00:00<00:01,  1.57it/s]\n",
      "Downloading data: 115kB [00:00, 38.2MB/s]                                                                              \u001b[A\n",
      "Downloading data files:  67%|████████████████████████████████████████                    | 2/3 [00:01<00:00,  1.88it/s]\n",
      "Downloading data: 192kB [00:00, 3.27MB/s]                                                                              \u001b[A\n",
      "Downloading data files: 100%|████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  1.80it/s]\n",
      "Extracting data files: 100%|█████████████████████████████████████████████████████████████████████| 3/3 [00:00<?, ?it/s]\n",
      "                                                                                                                       \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset wnut_17 downloaded and prepared to C:/Users/Vadim/.cache/huggingface/datasets/wnut_17/wnut_17/1.0.0/077c7f08b8dbc800692e8c9186cdf3606d5849ab0e7be662e6135bb10eba54f9. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 179.69it/s]\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "wnut = load_dataset(\"wnut_17\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "66c9ae43-ac1b-496b-893e-b916c3df0f2f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': '0',\n",
       " 'tokens': ['@paulwalk',\n",
       "  'It',\n",
       "  \"'s\",\n",
       "  'the',\n",
       "  'view',\n",
       "  'from',\n",
       "  'where',\n",
       "  'I',\n",
       "  \"'m\",\n",
       "  'living',\n",
       "  'for',\n",
       "  'two',\n",
       "  'weeks',\n",
       "  '.',\n",
       "  'Empire',\n",
       "  'State',\n",
       "  'Building',\n",
       "  '=',\n",
       "  'ESB',\n",
       "  '.',\n",
       "  'Pretty',\n",
       "  'bad',\n",
       "  'storm',\n",
       "  'here',\n",
       "  'last',\n",
       "  'evening',\n",
       "  '.'],\n",
       " 'ner_tags': [0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  7,\n",
       "  8,\n",
       "  8,\n",
       "  0,\n",
       "  7,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0]}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wnut[\"train\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "63a65eb5-eee0-451e-ad7c-a974f8e1835c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['O',\n",
       " 'B-corporation',\n",
       " 'I-corporation',\n",
       " 'B-creative-work',\n",
       " 'I-creative-work',\n",
       " 'B-group',\n",
       " 'I-group',\n",
       " 'B-location',\n",
       " 'I-location',\n",
       " 'B-person',\n",
       " 'I-person',\n",
       " 'B-product',\n",
       " 'I-product']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_list = wnut[\"train\"].features[f\"ner_tags\"].feature.names\n",
    "label_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5ca8689a-addf-4a10-a964-88f73e87b599",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading (…)okenizer_config.json: 100%|██████████████████████████████████████████████████| 28.0/28.0 [00:00<?, ?B/s]\n",
      "C:\\Users\\Vadim\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\file_download.py:133: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\Vadim\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
      "  warnings.warn(message)\n",
      "Downloading (…)lve/main/config.json: 100%|████████████████████████████████████████████████████| 483/483 [00:00<?, ?B/s]\n",
      "Downloading (…)solve/main/vocab.txt: 100%|██████████████████████████████████████████| 232k/232k [00:00<00:00, 1.48MB/s]\n",
      "Downloading (…)/main/tokenizer.json: 100%|██████████████████████████████████████████| 466k/466k [00:00<00:00, 5.94MB/s]\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ca02c0f1-25b0-4f8b-9f46-d4feb4e0c005",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['[CLS]',\n",
       " '@',\n",
       " 'paul',\n",
       " '##walk',\n",
       " 'it',\n",
       " \"'\",\n",
       " 's',\n",
       " 'the',\n",
       " 'view',\n",
       " 'from',\n",
       " 'where',\n",
       " 'i',\n",
       " \"'\",\n",
       " 'm',\n",
       " 'living',\n",
       " 'for',\n",
       " 'two',\n",
       " 'weeks',\n",
       " '.',\n",
       " 'empire',\n",
       " 'state',\n",
       " 'building',\n",
       " '=',\n",
       " 'es',\n",
       " '##b',\n",
       " '.',\n",
       " 'pretty',\n",
       " 'bad',\n",
       " 'storm',\n",
       " 'here',\n",
       " 'last',\n",
       " 'evening',\n",
       " '.',\n",
       " '[SEP]']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "example = wnut[\"train\"][0]\n",
    "\n",
    "tokenized_input = tokenizer(example[\"tokens\"], is_split_into_words=True)\n",
    "\n",
    "tokens = tokenizer.convert_ids_to_tokens(tokenized_input[\"input_ids\"])\n",
    "\n",
    "tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "05f09ac8-b94d-4a58-b767-7ddbebc8febf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokenize_and_align_labels(examples):\n",
    "    tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n",
    "    labels = []\n",
    "    for i, label in enumerate(examples[f\"ner_tags\"]):\n",
    "        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.\n",
    "        previous_word_idx = None\n",
    "        label_ids = []\n",
    "        for word_idx in word_ids:  # Set the special tokens to -100.\n",
    "            if word_idx is None:\n",
    "                label_ids.append(-100)\n",
    "            elif word_idx != previous_word_idx:  # Only label the first token of a given word.\n",
    "                label_ids.append(label[word_idx])\n",
    "            else:\n",
    "                label_ids.append(-100)\n",
    "            previous_word_idx = word_idx\n",
    "        labels.append(label_ids)\n",
    "    tokenized_inputs[\"labels\"] = labels\n",
    "    return tokenized_inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9835d91b-2811-47dc-93ae-f7e491f5f1cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                                                       \r"
     ]
    }
   ],
   "source": [
    "tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fb7c2ed2-7254-46c0-a1bf-49c598590b38",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import DataCollatorForTokenClassification\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "3203fca2-fd00-4356-831d-df30a0b3f3ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import DataCollatorForTokenClassification\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors=\"tf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0e730d06-b1a6-4a47-a6ed-3ea23e229e3d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading builder script: 100%|█████████████████████████████████████████████████| 6.34k/6.34k [00:00<00:00, 1.41MB/s]\n"
     ]
    }
   ],
   "source": [
    "import evaluate\n",
    "seqeval = evaluate.load(\"seqeval\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "aadb66dc-eab3-43c2-947f-4630d3320ed6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "labels = [label_list[i] for i in example[f\"ner_tags\"]]\n",
    "\n",
    "def compute_metrics(p):\n",
    "    predictions, labels = p\n",
    "    predictions = np.argmax(predictions, axis=2)\n",
    "    true_predictions = [\n",
    "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
    "        for prediction, label in zip(predictions, labels)\n",
    "    ]\n",
    "    true_labels = [\n",
    "        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
    "        for prediction, label in zip(predictions, labels)\n",
    "    ]\n",
    "\n",
    "    results = seqeval.compute(predictions=true_predictions, references=true_labels)\n",
    "    return {\n",
    "        \"precision\": results[\"overall_precision\"],\n",
    "        \"recall\": results[\"overall_recall\"],\n",
    "        \"f1\": results[\"overall_f1\"],\n",
    "        \"accuracy\": results[\"overall_accuracy\"],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "22de9a50-d138-49a5-9604-b02bbcac291d",
   "metadata": {},
   "outputs": [],
   "source": [
    "id2label = {\n",
    "    0: \"O\",\n",
    "    1: \"B-corporation\",\n",
    "    2: \"I-corporation\",\n",
    "    3: \"B-creative-work\",\n",
    "    4: \"I-creative-work\",\n",
    "    5: \"B-group\",\n",
    "    6: \"I-group\",\n",
    "    7: \"B-location\",\n",
    "    8: \"I-location\",\n",
    "    9: \"B-person\",\n",
    "    10: \"I-person\",\n",
    "    11: \"B-product\",\n",
    "    12: \"I-product\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d3c554a6-6a04-41ca-ac83-98fb20f605c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "label2id = {\n",
    "    \"O\": 0,\n",
    "    \"B-corporation\": 1,\n",
    "    \"I-corporation\": 2,\n",
    "    \"B-creative-work\": 3,\n",
    "    \"I-creative-work\": 4,\n",
    "    \"B-group\": 5,\n",
    "    \"I-group\": 6,\n",
    "    \"B-location\": 7,\n",
    "    \"I-location\": 8,\n",
    "    \"B-person\": 9,\n",
    "    \"I-person\": 10,\n",
    "    \"B-product\": 11,\n",
    "    \"I-product\": 12,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "5bc96c56-d36c-4e0f-af8a-b0495e6a118a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading pytorch_model.bin: 100%|████████████████████████████████████████████████| 268M/268M [00:23<00:00, 11.4MB/s]\n",
      "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']\n",
      "- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer\n",
    "\n",
    "model = AutoModelForTokenClassification.from_pretrained(\n",
    "    \"distilbert-base-uncased\", num_labels=13, id2label=id2label, label2id=label2id\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6a26e0b3-6432-44f4-b4ea-7c628402cc1f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "PyTorch: setting up devices\n",
      "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
     ]
    }
   ],
   "source": [
    "training_args = TrainingArguments(\n",
    "    output_dir=\"my_awesome_wnut_model\",\n",
    "    learning_rate=2e-5,\n",
    "    per_device_train_batch_size=16,\n",
    "    per_device_eval_batch_size=16,\n",
    "    num_train_epochs=2,\n",
    "    weight_decay=0.01,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    save_strategy=\"epoch\",\n",
    "    load_best_model_at_end=True,\n",
    "    push_to_hub=False,\n",
    ")\n",
    "\n",
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=tokenized_wnut[\"train\"],\n",
    "    eval_dataset=tokenized_wnut[\"test\"],\n",
    "    tokenizer=tokenizer,\n",
    "    data_collator=data_collator,\n",
    "    compute_metrics=compute_metrics,\n",
    ")\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "adbdec19-9da9-435a-9a48-b46e39ce99f1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The following columns in the training set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.\n",
      "***** Running training *****\n",
      "  Num examples = 3394\n",
      "  Num Epochs = 2\n",
      "  Instantaneous batch size per device = 16\n",
      "  Total train batch size (w. parallel, distributed & accumulation) = 16\n",
      "  Gradient Accumulation steps = 1\n",
      "  Total optimization steps = 426\n",
      "  Number of trainable parameters = 66372877\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "EagerTensor object has no attribute 'size'. \n        If you are looking for numpy-related methods, please run the following:\n        from tensorflow.python.ops.numpy_ops import np_config\n        np_config.enable_numpy_behavior()\n      ",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Input \u001b[1;32mIn [27]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:1501\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m   1496\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[0;32m   1498\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[0;32m   1499\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[0;32m   1500\u001b[0m )\n\u001b[1;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1502\u001b[0m \u001b[43m    \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1503\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1504\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1505\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1506\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:1749\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m   1747\u001b[0m         tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[0;32m   1748\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1749\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1751\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m   1752\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m   1753\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m   1754\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[0;32m   1755\u001b[0m ):\n\u001b[0;32m   1756\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m   1757\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:2508\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[1;34m(self, model, inputs)\u001b[0m\n\u001b[0;32m   2505\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m   2507\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[1;32m-> 2508\u001b[0m     loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2510\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mn_gpu \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m   2511\u001b[0m     loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()  \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:2540\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[1;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[0;32m   2538\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   2539\u001b[0m     labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 2540\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n\u001b[0;32m   2541\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[0;32m   2542\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[0;32m   2543\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\torch\\nn\\modules\\module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[0;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\models\\distilbert\\modeling_distilbert.py:978\u001b[0m, in \u001b[0;36mDistilBertForTokenClassification.forward\u001b[1;34m(self, input_ids, attention_mask, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m    972\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    973\u001b[0m \u001b[38;5;124;03mlabels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):\u001b[39;00m\n\u001b[0;32m    974\u001b[0m \u001b[38;5;124;03m    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.\u001b[39;00m\n\u001b[0;32m    975\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    976\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m--> 978\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdistilbert\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    979\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    980\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    981\u001b[0m \u001b[43m    \u001b[49m\u001b[43mhead_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    982\u001b[0m \u001b[43m    \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    983\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    984\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    985\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    986\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    988\u001b[0m sequence_output \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m    990\u001b[0m sequence_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropout(sequence_output)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\torch\\nn\\modules\\module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[0;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\models\\distilbert\\modeling_distilbert.py:551\u001b[0m, in \u001b[0;36mDistilBertModel.forward\u001b[1;34m(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m    549\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou cannot specify both input_ids and inputs_embeds at the same time\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    550\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m input_ids \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 551\u001b[0m     input_shape \u001b[38;5;241m=\u001b[39m \u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m()\n\u001b[0;32m    552\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    553\u001b[0m     input_shape \u001b[38;5;241m=\u001b[39m inputs_embeds\u001b[38;5;241m.\u001b[39msize()[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\tensorflow\\python\\framework\\ops.py:440\u001b[0m, in \u001b[0;36mTensor.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m    436\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name):\n\u001b[0;32m    437\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mravel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtranspose\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreshape\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclip\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    438\u001b[0m               \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtolist\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\"\u001b[39m}:\n\u001b[0;32m    439\u001b[0m     \u001b[38;5;66;03m# TODO(wangpeng): Export the enable_numpy_behavior knob\u001b[39;00m\n\u001b[1;32m--> 440\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[0;32m    441\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m    442\u001b[0m \u001b[38;5;124m      If you are looking for numpy-related methods, please run the following:\u001b[39m\n\u001b[0;32m    443\u001b[0m \u001b[38;5;124m      from tensorflow.python.ops.numpy_ops import np_config\u001b[39m\n\u001b[0;32m    444\u001b[0m \u001b[38;5;124m      np_config.enable_numpy_behavior()\u001b[39m\n\u001b[0;32m    445\u001b[0m \u001b[38;5;124m    \u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m)\n\u001b[0;32m    446\u001b[0m   \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattribute__\u001b[39m(name)\n",
      "\u001b[1;31mAttributeError\u001b[0m: EagerTensor object has no attribute 'size'. \n        If you are looking for numpy-related methods, please run the following:\n        from tensorflow.python.ops.numpy_ops import np_config\n        np_config.enable_numpy_behavior()\n      "
     ]
    }
   ],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "5016c257-74e3-4aad-b6cc-7706b4fc367d",
   "metadata": {},
   "outputs": [
    {
     "ename": "LocalTokenNotFoundError",
     "evalue": "Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mLocalTokenNotFoundError\u001b[0m                   Traceback (most recent call last)",
      "Input \u001b[1;32mIn [28]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:3431\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[1;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[0;32m   3428\u001b[0m \u001b[38;5;66;03m# If a user calls manually `push_to_hub` with `self.args.push_to_hub = False`, we try to create the repo but\u001b[39;00m\n\u001b[0;32m   3429\u001b[0m \u001b[38;5;66;03m# it might fail.\u001b[39;00m\n\u001b[0;32m   3430\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrepo\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 3431\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit_git_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3433\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n\u001b[0;32m   3434\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_model_id \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:3281\u001b[0m, in \u001b[0;36mTrainer.init_git_repo\u001b[1;34m(self, at_init)\u001b[0m\n\u001b[0;32m   3279\u001b[0m     repo_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_model_id\n\u001b[0;32m   3280\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m repo_name:\n\u001b[1;32m-> 3281\u001b[0m     repo_name \u001b[38;5;241m=\u001b[39m \u001b[43mget_full_repo_name\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhub_token\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3283\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   3284\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrepo \u001b[38;5;241m=\u001b[39m Repository(\n\u001b[0;32m   3285\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39moutput_dir,\n\u001b[0;32m   3286\u001b[0m         clone_from\u001b[38;5;241m=\u001b[39mrepo_name,\n\u001b[0;32m   3287\u001b[0m         use_auth_token\u001b[38;5;241m=\u001b[39muse_auth_token,\n\u001b[0;32m   3288\u001b[0m         private\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_private_repo,\n\u001b[0;32m   3289\u001b[0m     )\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\utils\\hub.py:801\u001b[0m, in \u001b[0;36mget_full_repo_name\u001b[1;34m(model_id, organization, token)\u001b[0m\n\u001b[0;32m    799\u001b[0m     token \u001b[38;5;241m=\u001b[39m HfFolder\u001b[38;5;241m.\u001b[39mget_token()\n\u001b[0;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m organization \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 801\u001b[0m     username \u001b[38;5;241m=\u001b[39m \u001b[43mwhoami\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m    802\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00musername\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    803\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[0;32m    116\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\hf_api.py:925\u001b[0m, in \u001b[0;36mHfApi.whoami\u001b[1;34m(self, token)\u001b[0m\n\u001b[0;32m    913\u001b[0m \u001b[38;5;129m@validate_hf_hub_args\u001b[39m\n\u001b[0;32m    914\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwhoami\u001b[39m(\u001b[38;5;28mself\u001b[39m, token: Optional[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict:\n\u001b[0;32m    915\u001b[0m     \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    916\u001b[0m \u001b[38;5;124;03m    Call HF API to know \"whoami\".\u001b[39;00m\n\u001b[0;32m    917\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    921\u001b[0m \u001b[38;5;124;03m            not provided.\u001b[39;00m\n\u001b[0;32m    922\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m    923\u001b[0m     r \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mget(\n\u001b[0;32m    924\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/api/whoami-v2\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m--> 925\u001b[0m         headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_build_hf_headers\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    926\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;66;43;03m# If `token` is provided and not `None`, it will be used by default.\u001b[39;49;00m\n\u001b[0;32m    927\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;66;43;03m# Otherwise, the token must be retrieved from cache or env variable.\u001b[39;49;00m\n\u001b[0;32m    928\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    929\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m    930\u001b[0m     )\n\u001b[0;32m    931\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    932\u001b[0m         hf_raise_for_status(r)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\hf_api.py:4992\u001b[0m, in \u001b[0;36mHfApi._build_hf_headers\u001b[1;34m(self, token, is_write_action, library_name, library_version, user_agent)\u001b[0m\n\u001b[0;32m   4989\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m   4990\u001b[0m     \u001b[38;5;66;03m# Cannot do `token = token or self.token` as token can be `False`.\u001b[39;00m\n\u001b[0;32m   4991\u001b[0m     token \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoken\n\u001b[1;32m-> 4992\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbuild_hf_headers\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   4993\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4994\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_write_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_write_action\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4995\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlibrary_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_name\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlibrary_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4996\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlibrary_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_version\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlibrary_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4997\u001b[0m \u001b[43m    \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4998\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[0;32m    116\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_headers.py:121\u001b[0m, in \u001b[0;36mbuild_hf_headers\u001b[1;34m(token, is_write_action, library_name, library_version, user_agent)\u001b[0m\n\u001b[0;32m     48\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m     49\u001b[0m \u001b[38;5;124;03mBuild headers dictionary to send in a HF Hub call.\u001b[39;00m\n\u001b[0;32m     50\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    118\u001b[0m \u001b[38;5;124;03m        If `token=True` but token is not saved locally.\u001b[39;00m\n\u001b[0;32m    119\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    120\u001b[0m \u001b[38;5;66;03m# Get auth token to send\u001b[39;00m\n\u001b[1;32m--> 121\u001b[0m token_to_send \u001b[38;5;241m=\u001b[39m \u001b[43mget_token_to_send\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    122\u001b[0m _validate_token_to_send(token_to_send, is_write_action\u001b[38;5;241m=\u001b[39mis_write_action)\n\u001b[0;32m    124\u001b[0m \u001b[38;5;66;03m# Combine headers\u001b[39;00m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_headers.py:153\u001b[0m, in \u001b[0;36mget_token_to_send\u001b[1;34m(token)\u001b[0m\n\u001b[0;32m    151\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m    152\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m cached_token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m LocalTokenNotFoundError(\n\u001b[0;32m    154\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToken is required (`token=True`), but no token found. You\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    155\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m need to provide a token or be logged in to Hugging Face with\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    156\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m `huggingface-cli login` or `huggingface_hub.login`. See\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    157\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m https://huggingface.co/settings/tokens.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    158\u001b[0m         )\n\u001b[0;32m    159\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cached_token\n\u001b[0;32m    161\u001b[0m \u001b[38;5;66;03m# Case implicit use of the token is forbidden by env variable\u001b[39;00m\n",
      "\u001b[1;31mLocalTokenNotFoundError\u001b[0m: Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens."
     ]
    }
   ],
   "source": [
    "trainer.push_to_hub()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6fa45bc-3aa4-47d1-bf4c-7ae1b51f0798",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}