{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "426ead53-211f-4f07-b327-ce1f75f923e0", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting transformers\n", " Downloading transformers-4.41.2-py3-none-any.whl (9.1 MB)\n", "\u001b[K |████████████████████████████████| 9.1 MB 26.1 MB/s eta 0:00:01\n", "\u001b[?25hCollecting sentencepiece\n", " Downloading sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[K |████████████████████████████████| 1.3 MB 94.2 MB/s eta 0:00:01\n", "\u001b[?25hCollecting filelock\n", " Downloading filelock-3.15.4-py3-none-any.whl (16 kB)\n", "Collecting numpy>=1.17\n", " Downloading numpy-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.3 MB)\n", "\u001b[K |████████████████████████████████| 19.3 MB 97.7 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (6.0.1)\n", "Collecting safetensors>=0.4.1\n", " Downloading safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", "\u001b[K |████████████████████████████████| 1.2 MB 87.6 MB/s eta 0:00:01\n", "\u001b[?25hCollecting tokenizers<0.20,>=0.19\n", " Downloading tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", "\u001b[K |████████████████████████████████| 3.6 MB 111.0 MB/s eta 0:00:01\n", "\u001b[?25hCollecting regex!=2019.12.17\n", " Downloading regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (774 kB)\n", "\u001b[K |████████████████████████████████| 774 kB 96.6 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (24.1)\n", "Collecting huggingface-hub<1.0,>=0.23.0\n", " Downloading huggingface_hub-0.23.4-py3-none-any.whl (402 kB)\n", "\u001b[K |████████████████████████████████| 402 kB 118.3 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (4.61.2)\n", "Requirement already satisfied: requests in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/user/miniconda/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (4.12.2)\n", "Collecting fsspec>=2023.5.0\n", " Downloading fsspec-2024.6.0-py3-none-any.whl (176 kB)\n", "\u001b[K |████████████████████████████████| 176 kB 127.1 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: urllib3<3,>=1.21.1 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (1.26.6)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (2.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (2021.5.30)\n", "Installing collected packages: fsspec, filelock, huggingface-hub, tokenizers, safetensors, regex, numpy, transformers, sentencepiece\n", "Successfully installed filelock-3.15.4 fsspec-2024.6.0 huggingface-hub-0.23.4 numpy-2.0.0 regex-2024.5.15 safetensors-0.4.3 sentencepiece-0.2.0 tokenizers-0.19.1 transformers-4.41.2\n" ] } ], "source": [ "!pip install transformers sentencepiece" ] }, { "cell_type": "code", "execution_count": 1, "id": "5bee7472-9458-428b-b315-2a6579ecbcd7", "metadata": { "tags": [] }, "outputs": [ { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.0051538944244384766, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "config.json", "rate": null, "total": 615, "unit": "B", "unit_divisor": 1000, "unit_scale": true }, "application/vnd.jupyter.widget-view+json": { "model_id": "b071a2288c7848b9a86e9c12e7c5105c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/615 [00:00 3\u001b[0m \u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnbroad/xlm-roberta-large-2048\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "\u001b[0;31mAttributeError\u001b[0m: 'CommitInfo' object has no attribute 'push_to_hub'" ] } ], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-large\").push_to_hub(\"nbroad/xlm-roberta-large-2048\")\n", "tokenizer.model_max_length = 2048\n", "tokenizer.push_to_hub(\"nbroad/xlm-roberta-large-2048\")" ] }, { "cell_type": "code", "execution_count": null, "id": "f64c2a48-0850-4520-a7bc-f8870f570066", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 5 }