{"cells":[{"cell_type":"markdown","metadata":{"id":"BGKCW074dTy2"},"source":["# Harvard USPTO Dataset Training"]},{"cell_type":"markdown","metadata":{"id":"6IttmojFdTy4"},"source":["## Preprocessing USPTO Data"]},{"cell_type":"markdown","source":["### Importing the Dataset\n","\n","We first need to import the actual USPTO dataset."],"metadata":{"id":"rJ6oNXYiOtC3"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1UCFBK0OdTy5","executionInfo":{"status":"ok","timestamp":1682021338971,"user_tz":240,"elapsed":13759,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"87e553e1-6593-4b2d-e578-2a4e4e742d9b"},"outputs":[{"output_type":"stream","name":"stdout","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting datasets\n"," Downloading datasets-2.11.0-py3-none-any.whl (468 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.7/468.7 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from datasets) (1.5.3)\n","Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (2023.4.0)\n","Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (2.27.1)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from datasets) (1.22.4)\n","Collecting multiprocess\n"," Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.9/132.9 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting responses<0.19\n"," Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n","Collecting xxhash\n"," Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.2/212.2 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from datasets) (23.1)\n","Collecting huggingface-hub<1.0.0,>=0.11.0\n"," Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.1/200.1 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting dill<0.3.7,>=0.3.0\n"," Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (4.65.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (6.0)\n","Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (9.0.0)\n","Collecting aiohttp\n"," Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m48.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (2.0.12)\n","Collecting async-timeout<5.0,>=4.0.0a3\n"," Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (23.1.0)\n","Collecting frozenlist>=1.1.1\n"," Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting aiosignal>=1.1.2\n"," Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n","Collecting yarl<2.0,>=1.0\n"," Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting multidict<7.0,>=4.5\n"," Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (4.5.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (3.11.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets) (3.4)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets) (1.26.15)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n","Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2022.7.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n","Installing collected packages: xxhash, multidict, frozenlist, dill, async-timeout, yarl, responses, multiprocess, huggingface-hub, aiosignal, aiohttp, datasets\n","Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.11.0 dill-0.3.6 frozenlist-1.3.3 huggingface-hub-0.13.4 multidict-6.0.4 multiprocess-0.70.14 responses-0.18.0 xxhash-3.2.0 yarl-1.8.2\n"]}],"source":["!pip install datasets"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"V20AfVn8dTy6"},"outputs":[],"source":["from datasets import load_dataset\n","import pandas as pd\n","import numpy as np\n","import os\n","import json\n","import torch\n","import sys"]},{"cell_type":"markdown","metadata":{"id":"DALhUYBydTy7"},"source":["### Loading the Dataset\n","\n","We need to extract the dataset. We filter only for those in January 2016."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":474,"referenced_widgets":["9f321834b0bc4bc1ac089f9813fc0fb1","12e47c63e2fb4596b645c252e9756899","043c0260b1e14a129feab90cff5ef099","ddba4a3380794815a732bd1a453c925c","8d6813d2f8ec401d85e599eadc8dc093","4dca0683fdc5459e88f6687f3196af7f","0dba55c366e44537adf4048c24391786","6695e22c05d548139e71029524c0bc68","2a7b1a0f1e94446bb343979d86264f0c","155a8f30bf6f4bbcb491dda1ad722c3b","aa0b1c2f45104fc5b59e1704bce27e5f","06a520a78f5d4b95982a476b29734cbf","c5be3bfd001346af8d33caf0a64efc7f","de1f71b45aba42e4a807100c4a8bf81f","6eee3b7c1ce9453e8c73f568537d5ac8","8f895536b733460d9ad2987333e3733b","e05d3a3d645948a3b2745cac3d5aa737","da25e097845440948f1d3ec1096f2a99","5cfa97b42072490f8ab4d5f60e0a1792","22f78b57f5604f928475f9a81d723baf","87b97444ca194dd18f1a5bd6e4082fb1","de9855ff33fb41b58ef21fbb26f81b85","9a74f42973b34203af1100702e07c28f","08d60a39b36942c184664157f738c5f0","fc886230dc454a72a3b2954f9818e9e8","482f58d514734fd2aa1d82693c7d5c34","f52a0089a4494e719c54289ff33a1c04","c2f38dc161184b16ba930c2362bc8e3b","e0f8f41cd81647908c645f76483148e5","2326d9dbc48b40769dcde81195324f40","0887166927dd4805ae3346c3158be0d2","04f4e59cfdfe492c9644dd99f46910e9","5cfce12745f44e5a9c94b72120170915","d7bfd7f1e624447a9c1eea68116d915b","6a337183787b43ec8634dc0c5b95bd72","7dd3444c8a754737b4ed1c8f64f68601","d9c18d34c6b249b78fcbace6d79cbdec","52879a2ff4864ba084a15fe02ab8b1fa","a90a02a84860461789ad04c386cdf44e","8cc2a2fe26534066a3a778704cc5984b","754c5121d01646c4aa4284df0eb9bb4f","b957825a21c2412a9b0101869d1f44d8","9f145dfa3f6347a7ab46aaa5fb294ea1","85d3c680e7424ab99c5153f315ac51a0","18d0fb273b444854a97ddb941859e0bc","fedd62c855344bbc931485f1bed1bb20","3cd64accf6104e5f8a3ad59381650a61","d39610354db4493f9f33405267b50179","c82c7f9596a746b6a91435ddfe2801f5","453092a5a76540f2b7561c3f4f84f8b2","ba82d11406a2498ba8948b5729fb4935","50863c3f09e343a9970b73dc70abba90","269546507c224a049088863d503401b1","0cd095c293104c67996f52685a29b7e9","f4c9a13af4cd478bacedd30ee2d81b8d","f9ef1b3c4aa54426af822c5f8420f2ca","28f10c7180d24373ad411015ef51d68c","59139bbd088c4804a9c8213afc3ddf21","a6c68f2b636b4b10ab7f846a789b00aa","417d14969aa141c885ae6ddd6b554324","345e90b85bb542168925cc014a1780ff","0c2447ec9c2346feb9d9d34ad6f5ec89","2c3ac49e5ace478ea09109090519aa65","984011a4cf53494baca77e2847c1a6ec","0ad51ba66c3a48a2bf44a58c63d6f6b8","f71dc5f4ba95461eafff245393c29efb","422a18bc728147cfab41a3d434784b87","8e84ffdf79144dee93ec82a857d8abf8","a6de06c9f8c9494abd2c5146a151abf4","b707d410fc0b46be82513a3156c864ec","7cdeb5f8b21f42549d6919f0a140697a","8fe2841c26c947c28479cd459ae5edff","d40cdc8dac2b40429bd92e6330916ed5","f8c2274beb1d47e1a2e8d76d3f2babe1","a0ade24bceb54a7daedc48858588590a","895d67eeac0f46edb926a8bdd33f419b","21d8d59ca7304455aae43a23c7cbbbe8"]},"id":"d-bfQ8MsdTy8","executionInfo":{"status":"ok","timestamp":1682021405537,"user_tz":240,"elapsed":56565,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"734dcb4b-d924-479d-909c-ef907c2284c8"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading builder script: 0%| | 0.00/14.7k [00:00\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
patent_numberdecisionabstractclaims
013261748ACCEPTEDThe present invention relates to passive optic...1. A compact optical network terminal, compris...
113995128ACCEPTEDEmbodiments of the invention provide a method ...1. A method comprising: using a first reader t...
314348792ACCEPTEDA crystal growth furnace comprising a crucible...1. A crystal growth furnace for growing a crys...
414360978REJECTEDA shoe midsole is composed of a base plate (1)...1. A sole member of footwear comprising a base...
514369795ACCEPTEDA ratchet tool includes a shaft member, a hand...1. A ratchet tool, comprising a shaft member, ...
...............
1614415002390ACCEPTEDA wavelength tunable laser device, including: ...1. A wavelength tunable laser device, comprisi...
1614515002391ACCEPTEDIn one aspect, a method for use in preparing a...1. (canceled) 2. The method of claim 19, where...
1614815002394ACCEPTEDA robot hand controlling method executes calcu...1. A controlling method of a robot hand, the r...
1614915002396REJECTEDA fusion protein is disclosed. The fusion prot...1. A fusion protein comprising an Fc fragment ...
1615015330955REJECTEDA pipe extraction tool that grips the inside o...1. A pipe extraction tool for extracting a pip...
\n","

8719 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n"," \n"," "]},"metadata":{},"execution_count":8}],"source":["trainDF"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"gTW_PbL5dTzD"},"outputs":[],"source":["valFeaturesToDrop = [col for col in list(df_val.columns) if col not in necessary_columns]\n","valDF = df_val.dropna()\n","valDF.drop(columns=valFeaturesToDrop, inplace=True)\n","valDF = valDF[valDF['decision'].isin(output_values)]"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":659},"id":"RHVeHC5QdTzD","executionInfo":{"status":"ok","timestamp":1682021425508,"user_tz":240,"elapsed":5,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"f2341ef5-fb24-4f24-e0c7-0e6cd251233e"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" patent_number decision \\\n","0 13144833 REJECTED \n","1 14006524 ACCEPTED \n","2 14365653 REJECTED \n","4 14396367 REJECTED \n","9 14416282 ACCEPTED \n","... ... ... \n","9085 15011551 REJECTED \n","9090 15011556 REJECTED \n","9091 15011557 ACCEPTED \n","9092 15011558 ACCEPTED \n","9093 15011559 ACCEPTED \n","\n"," abstract \\\n","0 Regimen for the treatment of rosacea include t... \n","1 A clamp arrangement includes a pair of bracket... \n","2 A system and method for device action and conf... \n","4 Systems and methods for managing datasets prod... \n","9 A scan driving circuit is provided. The scan d... \n","... ... \n","9085 The non-rigid gate device as described may be ... \n","9090 The present invention provides an improved unc... \n","9091 A method for detecting a software-race conditi... \n","9092 The present application relates to multi-stage... \n","9093 A paper feeder includes a housing, a driving u... \n","\n"," claims \n","0 1. A treatment regimen comprising: cleansing a... \n","1 1. A clamp arrangement for supporting a fractu... \n","2 1-20. (canceled) 21. A mobile device comprisin... \n","4 1. A method, comprising: executing, by one or ... \n","9 1. A scan driving circuit for driving a scan l... \n","... ... \n","9085 1; A non-rigid blocking apparatus referred to ... \n","9090 1. A method for rendering a plastic surface am... \n","9091 1. A method for detecting a software-race cond... \n","9092 1. A multi-stage amplitude modulation-based me... \n","9093 1. A paper feeder, comprising: a housing; a dr... \n","\n","[4888 rows x 4 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
patent_numberdecisionabstractclaims
013144833REJECTEDRegimen for the treatment of rosacea include t...1. A treatment regimen comprising: cleansing a...
114006524ACCEPTEDA clamp arrangement includes a pair of bracket...1. A clamp arrangement for supporting a fractu...
214365653REJECTEDA system and method for device action and conf...1-20. (canceled) 21. A mobile device comprisin...
414396367REJECTEDSystems and methods for managing datasets prod...1. A method, comprising: executing, by one or ...
914416282ACCEPTEDA scan driving circuit is provided. The scan d...1. A scan driving circuit for driving a scan l...
...............
908515011551REJECTEDThe non-rigid gate device as described may be ...1; A non-rigid blocking apparatus referred to ...
909015011556REJECTEDThe present invention provides an improved unc...1. A method for rendering a plastic surface am...
909115011557ACCEPTEDA method for detecting a software-race conditi...1. A method for detecting a software-race cond...
909215011558ACCEPTEDThe present application relates to multi-stage...1. A multi-stage amplitude modulation-based me...
909315011559ACCEPTEDA paper feeder includes a housing, a driving u...1. A paper feeder, comprising: a housing; a dr...
\n","

4888 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":10}],"source":["valDF"]},{"cell_type":"markdown","metadata":{"id":"YFOqWvPUdTzD"},"source":["We need to replace the values in the `decision` column to numerical representations. We will set \"ACCEPTED\" as `1` and \"REJECTED\" as `0`."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CBxfqBL0dTzD"},"outputs":[],"source":["yKey = {\"ACCEPTED\":1,\"REJECTED\":0}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"82I6gVrtdTzE"},"outputs":[],"source":["trainDF2 = trainDF.replace({\"decision\": yKey})\n","valDF2 = valDF.replace({\"decision\": yKey})"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":746},"id":"1XVwvlGKdTzE","executionInfo":{"status":"ok","timestamp":1682021428511,"user_tz":240,"elapsed":5,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"bb49c208-ee63-4a2c-86b1-6bea0449b583"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" patent_number decision \\\n","0 13261748 1 \n","1 13995128 1 \n","3 14348792 1 \n","4 14360978 0 \n","5 14369795 1 \n","... ... ... \n","16144 15002390 1 \n","16145 15002391 1 \n","16148 15002394 1 \n","16149 15002396 0 \n","16150 15330955 0 \n","\n"," abstract \\\n","0 The present invention relates to passive optic... \n","1 Embodiments of the invention provide a method ... \n","3 A crystal growth furnace comprising a crucible... \n","4 A shoe midsole is composed of a base plate (1)... \n","5 A ratchet tool includes a shaft member, a hand... \n","... ... \n","16144 A wavelength tunable laser device, including: ... \n","16145 In one aspect, a method for use in preparing a... \n","16148 A robot hand controlling method executes calcu... \n","16149 A fusion protein is disclosed. The fusion prot... \n","16150 A pipe extraction tool that grips the inside o... \n","\n"," claims \n","0 1. A compact optical network terminal, compris... \n","1 1. A method comprising: using a first reader t... \n","3 1. A crystal growth furnace for growing a crys... \n","4 1. A sole member of footwear comprising a base... \n","5 1. A ratchet tool, comprising a shaft member, ... \n","... ... \n","16144 1. A wavelength tunable laser device, comprisi... \n","16145 1. (canceled) 2. The method of claim 19, where... \n","16148 1. A controlling method of a robot hand, the r... \n","16149 1. A fusion protein comprising an Fc fragment ... \n","16150 1. A pipe extraction tool for extracting a pip... \n","\n","[8719 rows x 4 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
patent_numberdecisionabstractclaims
0132617481The present invention relates to passive optic...1. A compact optical network terminal, compris...
1139951281Embodiments of the invention provide a method ...1. A method comprising: using a first reader t...
3143487921A crystal growth furnace comprising a crucible...1. A crystal growth furnace for growing a crys...
4143609780A shoe midsole is composed of a base plate (1)...1. A sole member of footwear comprising a base...
5143697951A ratchet tool includes a shaft member, a hand...1. A ratchet tool, comprising a shaft member, ...
...............
16144150023901A wavelength tunable laser device, including: ...1. A wavelength tunable laser device, comprisi...
16145150023911In one aspect, a method for use in preparing a...1. (canceled) 2. The method of claim 19, where...
16148150023941A robot hand controlling method executes calcu...1. A controlling method of a robot hand, the r...
16149150023960A fusion protein is disclosed. The fusion prot...1. A fusion protein comprising an Fc fragment ...
16150153309550A pipe extraction tool that grips the inside o...1. A pipe extraction tool for extracting a pip...
\n","

8719 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":13}],"source":["trainDF2"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":659},"id":"D0eRJb_2dTzE","executionInfo":{"status":"ok","timestamp":1682021428713,"user_tz":240,"elapsed":4,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"74150843-f3b4-459c-ef34-d9cf08f0c0d6"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" patent_number decision \\\n","0 13144833 0 \n","1 14006524 1 \n","2 14365653 0 \n","4 14396367 0 \n","9 14416282 1 \n","... ... ... \n","9085 15011551 0 \n","9090 15011556 0 \n","9091 15011557 1 \n","9092 15011558 1 \n","9093 15011559 1 \n","\n"," abstract \\\n","0 Regimen for the treatment of rosacea include t... \n","1 A clamp arrangement includes a pair of bracket... \n","2 A system and method for device action and conf... \n","4 Systems and methods for managing datasets prod... \n","9 A scan driving circuit is provided. The scan d... \n","... ... \n","9085 The non-rigid gate device as described may be ... \n","9090 The present invention provides an improved unc... \n","9091 A method for detecting a software-race conditi... \n","9092 The present application relates to multi-stage... \n","9093 A paper feeder includes a housing, a driving u... \n","\n"," claims \n","0 1. A treatment regimen comprising: cleansing a... \n","1 1. A clamp arrangement for supporting a fractu... \n","2 1-20. (canceled) 21. A mobile device comprisin... \n","4 1. A method, comprising: executing, by one or ... \n","9 1. A scan driving circuit for driving a scan l... \n","... ... \n","9085 1; A non-rigid blocking apparatus referred to ... \n","9090 1. A method for rendering a plastic surface am... \n","9091 1. A method for detecting a software-race cond... \n","9092 1. A multi-stage amplitude modulation-based me... \n","9093 1. A paper feeder, comprising: a housing; a dr... \n","\n","[4888 rows x 4 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
patent_numberdecisionabstractclaims
0131448330Regimen for the treatment of rosacea include t...1. A treatment regimen comprising: cleansing a...
1140065241A clamp arrangement includes a pair of bracket...1. A clamp arrangement for supporting a fractu...
2143656530A system and method for device action and conf...1-20. (canceled) 21. A mobile device comprisin...
4143963670Systems and methods for managing datasets prod...1. A method, comprising: executing, by one or ...
9144162821A scan driving circuit is provided. The scan d...1. A scan driving circuit for driving a scan l...
...............
9085150115510The non-rigid gate device as described may be ...1; A non-rigid blocking apparatus referred to ...
9090150115560The present invention provides an improved unc...1. A method for rendering a plastic surface am...
9091150115571A method for detecting a software-race conditi...1. A method for detecting a software-race cond...
9092150115581The present application relates to multi-stage...1. A multi-stage amplitude modulation-based me...
9093150115591A paper feeder includes a housing, a driving u...1. A paper feeder, comprising: a housing; a dr...
\n","

4888 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":14}],"source":["valDF2"]},{"cell_type":"markdown","metadata":{"id":"gRb0ApxTdTzF"},"source":["We re-label the `decision` column to `label`."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":676},"id":"vRFoJ8hidTzF","executionInfo":{"status":"ok","timestamp":1682021435301,"user_tz":240,"elapsed":500,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"9444d1d4-5450-4767-aeb0-c1709889b7ac"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" patent_number label abstract \\\n","0 13261748 1 The present invention relates to passive optic... \n","1 13995128 1 Embodiments of the invention provide a method ... \n","3 14348792 1 A crystal growth furnace comprising a crucible... \n","4 14360978 0 A shoe midsole is composed of a base plate (1)... \n","5 14369795 1 A ratchet tool includes a shaft member, a hand... \n","... ... ... ... \n","16144 15002390 1 A wavelength tunable laser device, including: ... \n","16145 15002391 1 In one aspect, a method for use in preparing a... \n","16148 15002394 1 A robot hand controlling method executes calcu... \n","16149 15002396 0 A fusion protein is disclosed. The fusion prot... \n","16150 15330955 0 A pipe extraction tool that grips the inside o... \n","\n"," claims \n","0 1. A compact optical network terminal, compris... \n","1 1. A method comprising: using a first reader t... \n","3 1. A crystal growth furnace for growing a crys... \n","4 1. A sole member of footwear comprising a base... \n","5 1. A ratchet tool, comprising a shaft member, ... \n","... ... \n","16144 1. A wavelength tunable laser device, comprisi... \n","16145 1. (canceled) 2. The method of claim 19, where... \n","16148 1. A controlling method of a robot hand, the r... \n","16149 1. A fusion protein comprising an Fc fragment ... \n","16150 1. A pipe extraction tool for extracting a pip... \n","\n","[8719 rows x 4 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
patent_numberlabelabstractclaims
0132617481The present invention relates to passive optic...1. A compact optical network terminal, compris...
1139951281Embodiments of the invention provide a method ...1. A method comprising: using a first reader t...
3143487921A crystal growth furnace comprising a crucible...1. A crystal growth furnace for growing a crys...
4143609780A shoe midsole is composed of a base plate (1)...1. A sole member of footwear comprising a base...
5143697951A ratchet tool includes a shaft member, a hand...1. A ratchet tool, comprising a shaft member, ...
...............
16144150023901A wavelength tunable laser device, including: ...1. A wavelength tunable laser device, comprisi...
16145150023911In one aspect, a method for use in preparing a...1. (canceled) 2. The method of claim 19, where...
16148150023941A robot hand controlling method executes calcu...1. A controlling method of a robot hand, the r...
16149150023960A fusion protein is disclosed. The fusion prot...1. A fusion protein comprising an Fc fragment ...
16150153309550A pipe extraction tool that grips the inside o...1. A pipe extraction tool for extracting a pip...
\n","

8719 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":15}],"source":["trainDF3 = trainDF2.rename(columns={'decision': 'label'})\n","trainDF3"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":641},"id":"v3Qvaex7dTzG","executionInfo":{"status":"ok","timestamp":1682021437285,"user_tz":240,"elapsed":6,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"3c644844-db7e-4c3d-da66-afba74a1ca9a"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" patent_number label abstract \\\n","0 13144833 0 Regimen for the treatment of rosacea include t... \n","1 14006524 1 A clamp arrangement includes a pair of bracket... \n","2 14365653 0 A system and method for device action and conf... \n","4 14396367 0 Systems and methods for managing datasets prod... \n","9 14416282 1 A scan driving circuit is provided. The scan d... \n","... ... ... ... \n","9085 15011551 0 The non-rigid gate device as described may be ... \n","9090 15011556 0 The present invention provides an improved unc... \n","9091 15011557 1 A method for detecting a software-race conditi... \n","9092 15011558 1 The present application relates to multi-stage... \n","9093 15011559 1 A paper feeder includes a housing, a driving u... \n","\n"," claims \n","0 1. A treatment regimen comprising: cleansing a... \n","1 1. A clamp arrangement for supporting a fractu... \n","2 1-20. (canceled) 21. A mobile device comprisin... \n","4 1. A method, comprising: executing, by one or ... \n","9 1. A scan driving circuit for driving a scan l... \n","... ... \n","9085 1; A non-rigid blocking apparatus referred to ... \n","9090 1. A method for rendering a plastic surface am... \n","9091 1. A method for detecting a software-race cond... \n","9092 1. A multi-stage amplitude modulation-based me... \n","9093 1. A paper feeder, comprising: a housing; a dr... \n","\n","[4888 rows x 4 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
patent_numberlabelabstractclaims
0131448330Regimen for the treatment of rosacea include t...1. A treatment regimen comprising: cleansing a...
1140065241A clamp arrangement includes a pair of bracket...1. A clamp arrangement for supporting a fractu...
2143656530A system and method for device action and conf...1-20. (canceled) 21. A mobile device comprisin...
4143963670Systems and methods for managing datasets prod...1. A method, comprising: executing, by one or ...
9144162821A scan driving circuit is provided. The scan d...1. A scan driving circuit for driving a scan l...
...............
9085150115510The non-rigid gate device as described may be ...1; A non-rigid blocking apparatus referred to ...
9090150115560The present invention provides an improved unc...1. A method for rendering a plastic surface am...
9091150115571A method for detecting a software-race conditi...1. A method for detecting a software-race cond...
9092150115581The present application relates to multi-stage...1. A multi-stage amplitude modulation-based me...
9093150115591A paper feeder includes a housing, a driving u...1. A paper feeder, comprising: a housing; a dr...
\n","

4888 rows × 4 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":16}],"source":["valDF3 = valDF2.rename(columns={'decision': 'label'})\n","valDF3"]},{"cell_type":"markdown","metadata":{"id":"hJ8DMaCXdTzG"},"source":["We can grab the data for each column so that we have a list of values for training labels, training texts, validation labels, and validation texts.\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"z9omfOd0dTzG"},"outputs":[],"source":["trainData = {\n"," \"patent_numbers\":trainDF3[\"patent_number\"].tolist(),\n"," \"labels\":trainDF3[\"label\"].tolist(),\n"," \"abstracts\":trainDF3[\"abstract\"].tolist(),\n"," \"claims\":trainDF3[\"claims\"].tolist(),\n","}\n","valData = {\n"," \"patent_numbers\":valDF3[\"patent_number\"].tolist(),\n"," \"labels\":valDF3[\"label\"].tolist(),\n"," \"abstracts\":valDF3[\"abstract\"].tolist(),\n"," \"claims\":valDF3[\"claims\"].tolist(),\n","}"]},{"cell_type":"markdown","source":["We will save these dictionaries as data for later."],"metadata":{"id":"CLeEbFI_NBuK"}},{"cell_type":"code","source":["if not os.path.exists(\"./data\"):\n"," os.makedirs('./data')\n","\n","with open(\"./data/train.json\", \"w\") as outfile:\n"," json.dump(trainData, outfile, indent=2)\n","with open(\"./data/val.json\", \"w\") as outfile:\n"," json.dump(valData, outfile, indent=2)"],"metadata":{"id":"NBPNxz7qNHRq"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"pE3HG8bUdTzG"},"source":["## Loading the Trainer\n","\n","Now we can start training! This time, we will just go with `distilbert-base-uncased` for simplicity."]},{"cell_type":"markdown","source":["### Initializing Classes and Trainers"],"metadata":{"id":"YklaXlgDO6Jw"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hxE_CIT_dTzH","executionInfo":{"status":"ok","timestamp":1682021471720,"user_tz":240,"elapsed":16542,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"outputId":"758b0092-d56e-47b6-852a-4a19915bfe0c"},"outputs":[{"output_type":"stream","name":"stdout","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: torch in /usr/local/lib/python3.9/dist-packages (2.0.0+cu118)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch) (3.1.2)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from torch) (4.5.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch) (1.11.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from torch) (3.11.0)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch) (2.0.0)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch) (3.1)\n","Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (16.0.1)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (3.25.2)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch) (2.1.2)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch) (1.3.0)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting transformers\n"," Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.0/7.0 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.11.0)\n","Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n"," Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m100.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)\n","Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.4)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)\n","Installing collected packages: tokenizers, transformers\n","Successfully installed tokenizers-0.13.3 transformers-4.28.1\n"]}],"source":["!pip install torch\n","!pip install transformers"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"i8_0Ih_WdTzH"},"outputs":[],"source":["from torch.utils.data import Dataset, DataLoader\n","from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification\n","from transformers import Trainer, TrainingArguments, AdamW"]},{"cell_type":"code","source":["torch.backends.cuda.matmul.allow_tf32 = True\n","model_name = \"distilbert-base-uncased\"\n","upsto_abstracts_model_path = './models/uspto_abstracts'\n","upsto_claims_model_path = './models/uspto_claims'"],"metadata":{"id":"wXkvS5h2NrzW","executionInfo":{"status":"ok","timestamp":1682032710087,"user_tz":240,"elapsed":217,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}}},"execution_count":39,"outputs":[]},{"cell_type":"markdown","source":["We will create a Dataset class for the training"],"metadata":{"id":"awXD1_ltNxPC"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"yVi-Vhb-dTzH"},"outputs":[],"source":["class USPTODataset(Dataset):\n"," def __init__(self, encodings, labels):\n"," self.encodings = encodings\n"," self.labels = labels\n"," def __getitem__(self, idx):\n"," item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n"," item['labels'] = torch.tensor(self.labels[idx])\n"," return item\n"," def __len__(self):\n"," return len(self.labels)\n"]},{"cell_type":"markdown","source":["### Double-Checking the Data\n","\n","We will do a basic check: Do we have `trainData` and `valData` cached? If not, we need to load it in!"],"metadata":{"id":"ZXqCGaTxN7qy"}},{"cell_type":"code","source":["trainDataPath = \"./data/train.json\"\n","valDataPath = \"./data/val.json\"\n","\n","if trainData is None and os.path.exists(trainDataPath):\n"," f = open(trainDataPath)\n"," trainData = json.load(f)\n"," f.close()\n","if valData is None and os.path.exists(valDataPath):\n"," f = open(valDataPath)\n"," valData = json.load(f)\n"," f.close()"],"metadata":{"id":"8Szn0TJ-N7CI"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["### Encoding the Data"],"metadata":{"id":"V3oKe81RPIgq"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"4Cxzzr6KdTzI","executionInfo":{"status":"ok","timestamp":1682021490811,"user_tz":240,"elapsed":1763,"user":{"displayName":"Ryan Kim","userId":"18356277368138721144"}},"colab":{"base_uri":"https://localhost:8080/","height":257,"referenced_widgets":["37e352aeab994637887c9fce16a4fdda","7ce12e08913445429c0f44752b5f821c","6999d35a0c09459b9d0c9d47dba70320","5dc74e126ba4481e8e15ffa59b1eaf8e","3d9ca529621f46da9ed93641ae56b4ea","d9e649a7a52641b28b77037fc4713d77","3d83814aa933459dac4d493ab6c2ecf7","a08f7de9b7284616a3a6f2176804a714","2f624544ba68401491be11bb78cc8086","e7d6e1e3fb9a49e0b58281aca52517b5","51b1280a3a5e4facbeafb28923d77133","0da194d892754092ad01803ff69c9a7c","1e085de4a50e4c2685b9d24e0f289679","373fdb0d94684d44aa5e0e6293319bc6","c31e597bc5c14d14b287206ea8be2522","acf120e8d7f14a23a7a8a8f6d2c72d54","8bbb84dc028a4b62b1ea4dcd98131706","72125209dca54decaae05e5678a9eb60","659e0520847d4db5a5cf717a7be903b1","7077cb21a4b9491ab20b2af5dd7d30e5","517fdc4c1e61453f9e167dd8cc33f021","055f263ecfab430da77808fdc07699a1","cb4f082d2c384b74a54bac7e92b19772","12f8fa71da0d434a88c43ab13159fbc6","cb2d86cc73fd4a529d75aeb8e9c354ae","28bff5766c51461e8b9456c07aac9c57","8f3f4ca0a7114fb3929b2b80402c19ad","097daf3ff77f4d39809fe3a9d5bbd3c3","e53f41626ff34cbca574ef5be6b910e9","e7a1f0216c184d5e8abee0f4998f7cb7","ec7f6f10a68f4aa3b1696e4e1d59c231","041087211da7424e86b03574c00bcc7e","44c305d3e3ec44a1ac31a9e82ee00fd5","6cce9c60a7074c40ad9992597eb1f50a","87cad6102054466d8e1243da205cf506","5d1ae7f7479e485a97e80db391b6e694","789cf158a3154bba8b1091b2ec443843","f4d8392b478149949a77bf606fea3090","d457e5284b6e4ecf8efddff65b613315","abff237c84fe446f857de2c7c6fc466c","e34a8a0a27614aab95e63b221861965f","bf607dd1b0ba47c2a4b42cd934786356","f66a864297f1446d92968786100fa6ef","035fd49261424e179b16f2ae4688944e"]},"outputId":"d1afd722-6591-4860-db86-5bb9ffd58e7d"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading (…)okenizer_config.json: 0%| | 0.00/28.0 [00:00