"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [3504/3655 4:12:37 < 1:15:59, 0.03 it/s, Epoch 0.96/1]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Epoch | \n",
+ " Training Loss | \n",
+ " Validation Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+ "│ in <module> │\n",
+ "│ │\n",
+ "│ ❱ 1 trainer.train(resume_from_checkpoint = True) │\n",
+ "│ 2 wandb.finish() │\n",
+ "│ 3 │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:1645 in train │\n",
+ "│ │\n",
+ "│ 1642 │ │ inner_training_loop = find_executable_batch_size( │\n",
+ "│ 1643 │ │ │ self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size │\n",
+ "│ 1644 │ │ ) │\n",
+ "│ ❱ 1645 │ │ return inner_training_loop( │\n",
+ "│ 1646 │ │ │ args=args, │\n",
+ "│ 1647 │ │ │ resume_from_checkpoint=resume_from_checkpoint, │\n",
+ "│ 1648 │ │ │ trial=trial, │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:1938 in _inner_training_loop │\n",
+ "│ │\n",
+ "│ 1935 │ │ │ │ │ self.control = self.callback_handler.on_step_begin(args, self.state, │\n",
+ "│ 1936 │ │ │ │ │\n",
+ "│ 1937 │ │ │ │ with self.accelerator.accumulate(model): │\n",
+ "│ ❱ 1938 │ │ │ │ │ tr_loss_step = self.training_step(model, inputs) │\n",
+ "│ 1939 │ │ │ │ │\n",
+ "│ 1940 │ │ │ │ if ( │\n",
+ "│ 1941 │ │ │ │ │ args.logging_nan_inf_filter │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2770 in training_step │\n",
+ "│ │\n",
+ "│ 2767 │ │ │ with amp.scale_loss(loss, self.optimizer) as scaled_loss: │\n",
+ "│ 2768 │ │ │ │ scaled_loss.backward() │\n",
+ "│ 2769 │ │ else: │\n",
+ "│ ❱ 2770 │ │ │ self.accelerator.backward(loss) │\n",
+ "│ 2771 │ │ │\n",
+ "│ 2772 │ │ return loss.detach() / self.args.gradient_accumulation_steps │\n",
+ "│ 2773 │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/accelerate/accelerator.py:1819 in backward │\n",
+ "│ │\n",
+ "│ 1816 │ │ elif self.distributed_type == DistributedType.MEGATRON_LM: │\n",
+ "│ 1817 │ │ │ return │\n",
+ "│ 1818 │ │ elif self.scaler is not None: │\n",
+ "│ ❱ 1819 │ │ │ self.scaler.scale(loss).backward(**kwargs) │\n",
+ "│ 1820 │ │ else: │\n",
+ "│ 1821 │ │ │ loss.backward(**kwargs) │\n",
+ "│ 1822 │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/torch/_tensor.py:396 in backward │\n",
+ "│ │\n",
+ "│ 393 │ │ │ │ retain_graph=retain_graph, │\n",
+ "│ 394 │ │ │ │ create_graph=create_graph, │\n",
+ "│ 395 │ │ │ │ inputs=inputs) │\n",
+ "│ ❱ 396 │ │ torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=input │\n",
+ "│ 397 │ │\n",
+ "│ 398 │ def register_hook(self, hook): │\n",
+ "│ 399 │ │ r\"\"\"Registers a backward hook. │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/torch/autograd/__init__.py:173 in backward │\n",
+ "│ │\n",
+ "│ 170 │ # The reason we repeat same the comment below is that │\n",
+ "│ 171 │ # some Python versions print out the first line of a multi-line function │\n",
+ "│ 172 │ # calls in the traceback and some print out the last line │\n",
+ "│ ❱ 173 │ Variable._execution_engine.run_backward( # Calls into the C++ engine to run the bac │\n",
+ "│ 174 │ │ tensors, grad_tensors_, retain_graph, create_graph, inputs, │\n",
+ "│ 175 │ │ allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to ru │\n",
+ "│ 176 │\n",
+ "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+ "KeyboardInterrupt\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
+ "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 trainer.train(resume_from_checkpoint = \u001b[94mTrue\u001b[0m) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0mwandb.finish() \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1645\u001b[0m in \u001b[92mtrain\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1642 \u001b[0m\u001b[2m│ │ \u001b[0minner_training_loop = find_executable_batch_size( \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1643 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1644 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1645 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop( \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1646 \u001b[0m\u001b[2m│ │ │ \u001b[0margs=args, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1647 \u001b[0m\u001b[2m│ │ │ \u001b[0mresume_from_checkpoint=resume_from_checkpoint, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1648 \u001b[0m\u001b[2m│ │ │ \u001b[0mtrial=trial, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1938\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1935 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[96mself\u001b[0m.control = \u001b[96mself\u001b[0m.callback_handler.on_step_begin(args, \u001b[96mself\u001b[0m.state, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1936 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1937 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mwith\u001b[0m \u001b[96mself\u001b[0m.accelerator.accumulate(model): \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1938 \u001b[2m│ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1939 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1940 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mif\u001b[0m ( \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1941 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0margs.logging_nan_inf_filter \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2770\u001b[0m in \u001b[92mtraining_step\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2767 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m amp.scale_loss(loss, \u001b[96mself\u001b[0m.optimizer) \u001b[94mas\u001b[0m scaled_loss: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2768 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mscaled_loss.backward() \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2769 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2770 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.accelerator.backward(loss) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2771 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2772 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m loss.detach() / \u001b[96mself\u001b[0m.args.gradient_accumulation_steps \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2773 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/accelerate/\u001b[0m\u001b[1;33maccelerator.py\u001b[0m:\u001b[94m1819\u001b[0m in \u001b[92mbackward\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1816 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melif\u001b[0m \u001b[96mself\u001b[0m.distributed_type == DistributedType.MEGATRON_LM: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1817 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1818 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melif\u001b[0m \u001b[96mself\u001b[0m.scaler \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1819 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.scaler.scale(loss).backward(**kwargs) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1820 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1821 \u001b[0m\u001b[2m│ │ │ \u001b[0mloss.backward(**kwargs) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m1822 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/torch/\u001b[0m\u001b[1;33m_tensor.py\u001b[0m:\u001b[94m396\u001b[0m in \u001b[92mbackward\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 393 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mretain_graph=retain_graph, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 394 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mcreate_graph=create_graph, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 395 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minputs=inputs) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 396 \u001b[2m│ │ \u001b[0mtorch.autograd.backward(\u001b[96mself\u001b[0m, gradient, retain_graph, create_graph, inputs=input \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 397 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 398 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mregister_hook\u001b[0m(\u001b[96mself\u001b[0m, hook): \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 399 \u001b[0m\u001b[2;90m│ │ \u001b[0m\u001b[33mr\u001b[0m\u001b[33m\"\"\"Registers a backward hook.\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/torch/autograd/\u001b[0m\u001b[1;33m__init__.py\u001b[0m:\u001b[94m173\u001b[0m in \u001b[92mbackward\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m170 \u001b[0m\u001b[2m│ \u001b[0m\u001b[2m# The reason we repeat same the comment below is that\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m171 \u001b[0m\u001b[2m│ \u001b[0m\u001b[2m# some Python versions print out the first line of a multi-line function\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m172 \u001b[0m\u001b[2m│ \u001b[0m\u001b[2m# calls in the traceback and some print out the last line\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m173 \u001b[2m│ \u001b[0mVariable._execution_engine.run_backward( \u001b[2m# Calls into the C++ engine to run the bac\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m174 \u001b[0m\u001b[2m│ │ \u001b[0mtensors, grad_tensors_, retain_graph, create_graph, inputs, \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m175 \u001b[0m\u001b[2m│ │ \u001b[0mallow_unreachable=\u001b[94mTrue\u001b[0m, accumulate_grad=\u001b[94mTrue\u001b[0m) \u001b[2m# Calls into the C++ engine to ru\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m176 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
+ "\u001b[1;91mKeyboardInterrupt\u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainer.train(resume_from_checkpoint = True)\n",
+ "wandb.finish()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "399d27d2-13b6-45e4-bf68-9ff30ad0ec1d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-07-07T08:43:09.377158Z",
+ "iopub.status.busy": "2023-07-07T08:43:09.376340Z",
+ "iopub.status.idle": "2023-07-07T08:43:09.380265Z",
+ "shell.execute_reply": "2023-07-07T08:43:09.379781Z",
+ "shell.execute_reply.started": "2023-07-07T08:43:09.377127Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Perplexity: 236.31\n"
+ ]
+ }
+ ],
+ "source": [
+ "import math\n",
+ "print(f\"Perplexity: {math.exp(5.465127):.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "bbf22bea-7026-42c9-a643-ba65ab8cdbff",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-07-09T12:16:26.337814Z",
+ "iopub.status.busy": "2023-07-09T12:16:26.337328Z",
+ "iopub.status.idle": "2023-07-09T12:47:19.149806Z",
+ "shell.execute_reply": "2023-07-09T12:47:19.149298Z",
+ "shell.execute_reply.started": "2023-07-09T12:16:26.337789Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [50731/50731 30:52]\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Perplexity: 78.40\n"
+ ]
+ }
+ ],
+ "source": [
+ "eval_results = trainer.evaluate()\n",
+ "print(f\"Perplexity: {math.exp(eval_results['eval_loss']):.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "7fd81e73-7c9c-401e-8b8d-8e2a843bb7c7",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-07-09T12:53:37.509234Z",
+ "iopub.status.busy": "2023-07-09T12:53:37.508417Z",
+ "iopub.status.idle": "2023-07-09T12:53:38.733085Z",
+ "shell.execute_reply": "2023-07-09T12:53:38.732326Z",
+ "shell.execute_reply.started": "2023-07-09T12:53:37.509205Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+ "│ in <module> │\n",
+ "│ │\n",
+ "│ ❱ 1 trainer.push_to_hub() │\n",
+ "│ 2 │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:3716 in push_to_hub │\n",
+ "│ │\n",
+ "│ 3713 │ │ # If a user calls manually `push_to_hub` with `self.args.push_to_hub = False`, w │\n",
+ "│ 3714 │ │ # it might fail. │\n",
+ "│ 3715 │ │ if not hasattr(self, \"repo\"): │\n",
+ "│ ❱ 3716 │ │ │ self.init_git_repo() │\n",
+ "│ 3717 │ │ │\n",
+ "│ 3718 │ │ model_name = kwargs.pop(\"model_name\", None) │\n",
+ "│ 3719 │ │ if model_name is None and self.args.should_save: │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:3571 in init_git_repo │\n",
+ "│ │\n",
+ "│ 3568 │ │ # Make sure the repo exists. │\n",
+ "│ 3569 │ │ create_repo(repo_name, token=self.args.hub_token, private=self.args.hub_private_ │\n",
+ "│ 3570 │ │ try: │\n",
+ "│ ❱ 3571 │ │ │ self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=sel │\n",
+ "│ 3572 │ │ except EnvironmentError: │\n",
+ "│ 3573 │ │ │ if self.args.overwrite_output_dir and at_init: │\n",
+ "│ 3574 │ │ │ │ # Try again after wiping output_dir │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_validators.py:118 in _inner_fn │\n",
+ "│ │\n",
+ "│ 115 │ │ if check_use_auth_token: │\n",
+ "│ 116 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=ha │\n",
+ "│ 117 │ │ │\n",
+ "│ ❱ 118 │ │ return fn(*args, **kwargs) │\n",
+ "│ 119 │ │\n",
+ "│ 120 │ return _inner_fn # type: ignore │\n",
+ "│ 121 │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:516 in __init__ │\n",
+ "│ │\n",
+ "│ 513 │ │ │ self.huggingface_token = HfFolder.get_token() │\n",
+ "│ 514 │ │ │\n",
+ "│ 515 │ │ if clone_from is not None: │\n",
+ "│ ❱ 516 │ │ │ self.clone_from(repo_url=clone_from) │\n",
+ "│ 517 │ │ else: │\n",
+ "│ 518 │ │ │ if is_git_repo(self.local_dir): │\n",
+ "│ 519 │ │ │ │ logger.debug(\"[Repository] is a valid git repo\") │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_validators.py:118 in _inner_fn │\n",
+ "│ │\n",
+ "│ 115 │ │ if check_use_auth_token: │\n",
+ "│ 116 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=ha │\n",
+ "│ 117 │ │ │\n",
+ "│ ❱ 118 │ │ return fn(*args, **kwargs) │\n",
+ "│ 119 │ │\n",
+ "│ 120 │ return _inner_fn # type: ignore │\n",
+ "│ 121 │\n",
+ "│ │\n",
+ "│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:680 in clone_from │\n",
+ "│ │\n",
+ "│ 677 │ │ │ else: │\n",
+ "│ 678 │ │ │ │ # Check if the folder is the root of a git repository │\n",
+ "│ 679 │ │ │ │ if not is_git_repo(self.local_dir): │\n",
+ "│ ❱ 680 │ │ │ │ │ raise EnvironmentError( │\n",
+ "│ 681 │ │ │ │ │ │ \"Tried to clone a repository in a non-empty folder that isn't\" │\n",
+ "│ 682 │ │ │ │ │ │ f\" a git repository ('{self.local_dir}'). If you really want to\" │\n",
+ "│ 683 │ │ │ │ │ │ f\" do this, do it manually:\\n cd {self.local_dir} && git init\" │\n",
+ "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+ "OSError: Tried to clone a repository in a non-empty folder that isn't a git repository \n",
+ "('/notebooks/9wimu9/sinhala-bert-1'). If you really want to do this, do it manually:\n",
+ " cd /notebooks/9wimu9/sinhala-bert-1 && git init && git remote add origin && git pull origin main\n",
+ " or clone repo to a new folder and move your existing files there afterwards.\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
+ "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 trainer.push_to_hub() \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m3716\u001b[0m in \u001b[92mpush_to_hub\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3713 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# If a user calls manually `push_to_hub` with `self.args.push_to_hub = False`, w\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3714 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# it might fail.\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3715 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mhasattr\u001b[0m(\u001b[96mself\u001b[0m, \u001b[33m\"\u001b[0m\u001b[33mrepo\u001b[0m\u001b[33m\"\u001b[0m): \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m3716 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.init_git_repo() \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3717 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3718 \u001b[0m\u001b[2m│ │ \u001b[0mmodel_name = kwargs.pop(\u001b[33m\"\u001b[0m\u001b[33mmodel_name\u001b[0m\u001b[33m\"\u001b[0m, \u001b[94mNone\u001b[0m) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3719 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m model_name \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.args.should_save: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m3571\u001b[0m in \u001b[92minit_git_repo\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3568 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Make sure the repo exists.\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3569 \u001b[0m\u001b[2m│ │ \u001b[0mcreate_repo(repo_name, token=\u001b[96mself\u001b[0m.args.hub_token, private=\u001b[96mself\u001b[0m.args.hub_private_ \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3570 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m3571 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.repo = Repository(\u001b[96mself\u001b[0m.args.output_dir, clone_from=repo_name, token=\u001b[96msel\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3572 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m \u001b[96mEnvironmentError\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3573 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.overwrite_output_dir \u001b[95mand\u001b[0m at_init: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m3574 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[2m# Try again after wiping output_dir\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/\u001b[0m\u001b[1;33m_validators.py\u001b[0m:\u001b[94m118\u001b[0m in \u001b[92m_inner_fn\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m115 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m check_use_auth_token: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m116 \u001b[0m\u001b[2m│ │ │ \u001b[0mkwargs = smoothly_deprecate_use_auth_token(fn_name=fn.\u001b[91m__name__\u001b[0m, has_token=ha \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m117 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m118 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m fn(*args, **kwargs) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m119 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m120 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m _inner_fn \u001b[2m# type: ignore\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m121 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/\u001b[0m\u001b[1;33mrepository.py\u001b[0m:\u001b[94m516\u001b[0m in \u001b[92m__init__\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 513 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.huggingface_token = HfFolder.get_token() \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 514 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 515 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m clone_from \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 516 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.clone_from(repo_url=clone_from) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 517 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 518 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m is_git_repo(\u001b[96mself\u001b[0m.local_dir): \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 519 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mlogger.debug(\u001b[33m\"\u001b[0m\u001b[33m[Repository] is a valid git repo\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/\u001b[0m\u001b[1;33m_validators.py\u001b[0m:\u001b[94m118\u001b[0m in \u001b[92m_inner_fn\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m115 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m check_use_auth_token: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m116 \u001b[0m\u001b[2m│ │ │ \u001b[0mkwargs = smoothly_deprecate_use_auth_token(fn_name=fn.\u001b[91m__name__\u001b[0m, has_token=ha \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m117 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m118 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m fn(*args, **kwargs) \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m119 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m120 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m _inner_fn \u001b[2m# type: ignore\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m121 \u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/\u001b[0m\u001b[1;33mrepository.py\u001b[0m:\u001b[94m680\u001b[0m in \u001b[92mclone_from\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 677 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 678 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[2m# Check if the folder is the root of a git repository\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 679 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m is_git_repo(\u001b[96mself\u001b[0m.local_dir): \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 680 \u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mEnvironmentError\u001b[0m( \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 681 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[33m\"\u001b[0m\u001b[33mTried to clone a repository in a non-empty folder that isn\u001b[0m\u001b[33m'\u001b[0m\u001b[33mt\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 682 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m a git repository (\u001b[0m\u001b[33m'\u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.local_dir\u001b[33m}\u001b[0m\u001b[33m'\u001b[0m\u001b[33m). If you really want to\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m│\u001b[0m \u001b[2m 683 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m do this, do it manually:\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m cd \u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.local_dir\u001b[33m}\u001b[0m\u001b[33m && git init\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n",
+ "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
+ "\u001b[1;91mOSError: \u001b[0mTried to clone a repository in a non-empty folder that isn't a git repository \n",
+ "\u001b[1m(\u001b[0m\u001b[32m'/notebooks/9wimu9/sinhala-bert-1'\u001b[0m\u001b[1m)\u001b[0m. If you really want to do this, do it manually:\n",
+ " cd \u001b[35m/notebooks/9wimu9/\u001b[0m\u001b[95msinhala-bert-1\u001b[0m && git init && git remote add origin && git pull origin main\n",
+ " or clone repo to a new folder and move your existing files there afterwards.\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainer.push_to_hub()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "d3417a50-f0a7-4cd7-bc3b-14106660be58",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-07-09T12:55:51.765511Z",
+ "iopub.status.busy": "2023-07-09T12:55:51.764785Z",
+ "iopub.status.idle": "2023-07-09T12:55:53.074194Z",
+ "shell.execute_reply": "2023-07-09T12:55:53.073512Z",
+ "shell.execute_reply.started": "2023-07-09T12:55:51.765481Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "trainer.save_model(\"path_to_save\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "6a3b42de-552c-41fc-a454-afe8a0bf567d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-07-09T12:58:09.073605Z",
+ "iopub.status.busy": "2023-07-09T12:58:09.073328Z",
+ "iopub.status.idle": "2023-07-09T12:58:13.289346Z",
+ "shell.execute_reply": "2023-07-09T12:58:13.288684Z",
+ "shell.execute_reply.started": "2023-07-09T12:58:09.073583Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of the model checkpoint at /notebooks/path_to_save were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']\n",
+ "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of RobertaModel were not initialized from the model checkpoint at /notebooks/path_to_save and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import AutoModel \n",
+ "model = AutoModel.from_pretrained('/notebooks/path_to_save',local_files_only=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "b6f2c49a-9a09-4949-b67f-29df6d0aa895",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-07-09T13:02:12.261157Z",
+ "iopub.status.busy": "2023-07-09T13:02:12.260199Z",
+ "iopub.status.idle": "2023-07-09T13:03:41.483513Z",
+ "shell.execute_reply": "2023-07-09T13:03:41.483062Z",
+ "shell.execute_reply.started": "2023-07-09T13:02:12.261124Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "19ea017fbdf04010b52469760f205626",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "pytorch_model.bin: 0%| | 0.00/1.27G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "CommitInfo(commit_url='https://huggingface.co/9wimu9/sinhala-bert-1.1/commit/97e53a1d1bfd88984d639b643028a19ae2700b75', commit_message='Upload model', commit_description='', oid='97e53a1d1bfd88984d639b643028a19ae2700b75', pr_url=None, pr_revision=None, pr_num=None)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.push_to_hub('9wimu9/sinhala-bert-1.1')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d4553ec7-1e38-4b44-8c5f-e46786cd3cfc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from huggingface_hub import HfApi\n",
+ "api = HfApi()\n",
+ "files = ['tokenizer.json','training_args.bin']\n",
+ "for file in files:\n",
+ " api.upload_file(\n",
+ " path_or_fileobj=\"/notebooks/path_to_save/\"+file,\n",
+ " path_in_repo=file,\n",
+ " repo_id=\"9wimu9/sinhala-bert-1.1\",\n",
+ " repo_type=\"model\",\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d1614503-df5d-454f-a81d-d96bb1899443",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "learning rate scheduler details can be find here\n",
+ "https://dev.classmethod.jp/articles/huggingface-usage-scheluder-type/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd184295-1c0b-4625-a516-da417beb814f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bert hyper params\n",
+ "======================\n",
+ "β1 = 0.9,\n",
+ "β2 = 0.999, \n",
+ "ǫ = 1e-6\n",
+ "L2 weight decay = 0.01\n",
+ "learning rate = warmed up first 10k to a peak of 1e-4 then linearly decayed\n",
+ "drop out 0.1\n",
+ "batch size = 256\n",
+ "step size = 1m\n",
+ "max_token_length = 512\n",
+ "\n",
+ "roberta\n",
+ "============\n",
+ "β2 = 0.98 for lareg batch sizs\n",
+ "max_token_length = 512\n",
+ "batch size = 2k\n",
+ "lr = 7e-4\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}