diff --git "a/train-orach-chaim.ipynb" "b/train-orach-chaim.ipynb"
new file mode 100644--- /dev/null
+++ "b/train-orach-chaim.ipynb"
@@ -0,0 +1,5814 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+ "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:03.809709Z",
+ "iopub.status.busy": "2023-12-05T04:04:03.808818Z",
+ "iopub.status.idle": "2023-12-05T04:04:21.308742Z",
+ "shell.execute_reply": "2023-12-05T04:04:21.308088Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:03.809666Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.9/dist-packages (4.21.3)\n",
+ "Collecting transformers\n",
+ " Downloading transformers-4.35.2-py3-none-any.whl (7.9 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m52.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: datasets in /usr/local/lib/python3.9/dist-packages (2.4.0)\n",
+ "Collecting datasets\n",
+ " Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m51.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting evaluate\n",
+ " Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting accelerate\n",
+ " Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (5.4.1)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)\n",
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.64.1)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.28.2)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.23.4)\n",
+ "Collecting huggingface-hub<1.0,>=0.16.4\n",
+ " Downloading huggingface_hub-0.19.4-py3-none-any.whl (311 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.7/311.7 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting tokenizers<0.19,>=0.14\n",
+ " Downloading tokenizers-0.15.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m63.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.9.0)\n",
+ "Collecting safetensors>=0.3.1\n",
+ " Downloading safetensors-0.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n",
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.9/dist-packages (from datasets) (3.2.0)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.9/dist-packages (from datasets) (3.8.3)\n",
+ "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (2023.1.0)\n",
+ "Collecting pyarrow-hotfix\n",
+ " Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from datasets) (1.5.0)\n",
+ "Requirement already satisfied: multiprocess in /usr/local/lib/python3.9/dist-packages (from datasets) (0.70.13)\n",
+ "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (0.3.5.1)\n",
+ "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (10.0.1)\n",
+ "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.9/dist-packages (from evaluate) (0.18.0)\n",
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.9/dist-packages (from accelerate) (5.9.4)\n",
+ "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.9/dist-packages (from accelerate) (1.12.1+cu116)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (1.8.2)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (18.2.0)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (1.3.3)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (4.0.2)\n",
+ "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (2.1.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (6.0.4)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.4.0)\n",
+ "Collecting huggingface-hub<1.0,>=0.16.4\n",
+ " Downloading huggingface_hub-0.19.3-py3-none-any.whl (311 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading huggingface_hub-0.19.2-py3-none-any.whl (311 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m49.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading huggingface_hub-0.19.1-py3-none-any.whl (311 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.1/311.1 kB\u001b[0m \u001b[31m42.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m33.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m44.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hINFO: pip is looking at multiple versions of aiohttp to determine which version is compatible with other requirements. This could take a while.\n",
+ "Collecting aiohttp\n",
+ " Downloading aiohttp-3.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+ "\u001b[?25hINFO: pip is looking at multiple versions of fsspec[http] to determine which version is compatible with other requirements. This could take a while.\n",
+ "Collecting fsspec[http]<=2023.10.0,>=2023.1.0\n",
+ " Downloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->transformers) (2.8)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->transformers) (2019.11.28)\n",
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.14)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2022.7.1)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.14.0)\n",
+ "Installing collected packages: safetensors, pyarrow-hotfix, fsspec, huggingface-hub, tokenizers, accelerate, transformers, datasets, evaluate\n",
+ " Attempting uninstall: fsspec\n",
+ " Found existing installation: fsspec 2023.1.0\n",
+ " Uninstalling fsspec-2023.1.0:\n",
+ " Successfully uninstalled fsspec-2023.1.0\n",
+ " Attempting uninstall: huggingface-hub\n",
+ " Found existing installation: huggingface-hub 0.12.0\n",
+ " Uninstalling huggingface-hub-0.12.0:\n",
+ " Successfully uninstalled huggingface-hub-0.12.0\n",
+ " Attempting uninstall: tokenizers\n",
+ " Found existing installation: tokenizers 0.12.1\n",
+ " Uninstalling tokenizers-0.12.1:\n",
+ " Successfully uninstalled tokenizers-0.12.1\n",
+ " Attempting uninstall: transformers\n",
+ " Found existing installation: transformers 4.21.3\n",
+ " Uninstalling transformers-4.21.3:\n",
+ " Successfully uninstalled transformers-4.21.3\n",
+ " Attempting uninstall: datasets\n",
+ " Found existing installation: datasets 2.4.0\n",
+ " Uninstalling datasets-2.4.0:\n",
+ " Successfully uninstalled datasets-2.4.0\n",
+ "Successfully installed accelerate-0.25.0 datasets-2.15.0 evaluate-0.4.1 fsspec-2023.10.0 huggingface-hub-0.19.4 pyarrow-hotfix-0.6 safetensors-0.4.1 tokenizers-0.15.0 transformers-4.35.2\n",
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
+ "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install -U transformers datasets evaluate accelerate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:21.310409Z",
+ "iopub.status.busy": "2023-12-05T04:04:21.310130Z",
+ "iopub.status.idle": "2023-12-05T04:04:21.783747Z",
+ "shell.execute_reply": "2023-12-05T04:04:21.783079Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:21.310381Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n",
+ "Token is valid (permission: write).\n",
+ "Your token has been saved to /root/.cache/huggingface/token\n",
+ "Login successful\n"
+ ]
+ }
+ ],
+ "source": [
+ "from huggingface_hub import login\n",
+ "\n",
+ "login('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')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:21.785174Z",
+ "iopub.status.busy": "2023-12-05T04:04:21.784648Z",
+ "iopub.status.idle": "2023-12-05T04:04:24.580049Z",
+ "shell.execute_reply": "2023-12-05T04:04:24.579275Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:21.785148Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4332346192424ecba545a41bfa038983",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading readme: 0%| | 0.00/714 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "912c3c39020b435f9d5fd336a73af546",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading data files: 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "cde2a96564724b0cb55e049c7d388c0b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading data: 0%| | 0.00/10.9M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9867623f3a48416c9cd09ae76b2ac1fc",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Extracting data files: 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8c80f1d324f741799078d136d73950db",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating train split: 0%| | 0/42608 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from datasets import load_dataset\n",
+ "\n",
+ "dataset = load_dataset(\"sivan22/orach-chaim\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:24.582217Z",
+ "iopub.status.busy": "2023-12-05T04:04:24.581855Z",
+ "iopub.status.idle": "2023-12-05T04:04:24.588662Z",
+ "shell.execute_reply": "2023-12-05T04:04:24.587648Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:24.582192Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['Unnamed: 0', 'bookname', 'siman', 'sek', 'text', 'seif', 'topic'],\n",
+ " num_rows: 42608\n",
+ " })\n",
+ "})"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:24.592319Z",
+ "iopub.status.busy": "2023-12-05T04:04:24.592074Z",
+ "iopub.status.idle": "2023-12-05T04:04:24.600368Z",
+ "shell.execute_reply": "2023-12-05T04:04:24.599325Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:24.592293Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Unnamed: 0': 24246,\n",
+ " 'bookname': ' משנה ברורה',\n",
+ " 'siman': 'תלא',\n",
+ " 'sek': 'ט',\n",
+ " 'text': ' ואם התחיל ללמוד: וה\"ה שאר מלאכות: ',\n",
+ " 'seif': ' ב',\n",
+ " 'topic': None}"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from random import randint\n",
+ "rnd = randint(0,len(dataset[\"train\"]))\n",
+ "dataset[\"train\"][rnd]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:24.605031Z",
+ "iopub.status.busy": "2023-12-05T04:04:24.604696Z",
+ "iopub.status.idle": "2023-12-05T04:04:24.704105Z",
+ "shell.execute_reply": "2023-12-05T04:04:24.701926Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:24.605005Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "dataset[\"train\"] = dataset[\"train\"].add_column( \"label\",[0] * len(dataset[\"train\"],))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:24.710590Z",
+ "iopub.status.busy": "2023-12-05T04:04:24.710344Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.345669Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.344609Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:24.710566Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " bookname | \n",
+ " siman | \n",
+ " sek | \n",
+ " text | \n",
+ " seif | \n",
+ " topic | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " עטרת זקנים על אורח חיים | \n",
+ " א | \n",
+ " א | \n",
+ " ומיד כשיעור משנתו ואינו רוצ' לישן יטול ידיו אף... | \n",
+ " א | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " עטרת זקנים ע�� אורח חיים | \n",
+ " א | \n",
+ " ב | \n",
+ " המשכים לעסוק בתור' ילבוש עצמו כראוי להכין לקרא... | \n",
+ " ב | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " עטרת זקנים על אורח חיים | \n",
+ " א | \n",
+ " ג | \n",
+ " ראוי. צריך כ\"א להשתת' עם צער' של רחל לבכות בכל... | \n",
+ " ג | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " עטרת זקנים על אורח חיים | \n",
+ " א | \n",
+ " ד | \n",
+ " טוב לומר סדר הקרבנות דוקא ביום רק משנת איזהו מ... | \n",
+ " ה | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " עטרת זקנים על אורח חיים | \n",
+ " א | \n",
+ " ה | \n",
+ " כשיסיים. אחר חטאת ואשם ודאי לא יאמר משום דהאמי... | \n",
+ " ז | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 42603 | \n",
+ " 42603 | \n",
+ " אשל אברהם על שולחן ערוך אורח חיים | \n",
+ " תרצה | \n",
+ " ב | \n",
+ " מלשלם. עיין רא\"ש סוכה דף מ\"ה ות' בי\"ד סי' תל\"ד: | \n",
+ " ב | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42604 | \n",
+ " 42604 | \n",
+ " אשל אברהם על שולחן ערוך אורח חיים | \n",
+ " תרצו | \n",
+ " א | \n",
+ " שלא לעשות. וכשחל בע\"ש מותר לגלח לכבוד שבת לכ\"... | \n",
+ " א | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42605 | \n",
+ " 42605 | \n",
+ " אשל אברהם על שולחן ערוך אורח חיים | \n",
+ " תרצו | \n",
+ " ב | \n",
+ " ותענית. ות\"ח שרי אלא דצריך למיתב תענית לתענית... | \n",
+ " ג | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42606 | \n",
+ " 42606 | \n",
+ " אשל אברהם על שולחן ערוך אורח חיים | \n",
+ " תרצו | \n",
+ " ג | \n",
+ " נוהגים. ע' באר היטב לי\"ד סי' ת\"א וכמ\"ש שם: | \n",
+ " ד | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 42607 | \n",
+ " 42607 | \n",
+ " אשל אברהם על שולחן ערוך אורח חיים | \n",
+ " תרצו | \n",
+ " ד | \n",
+ " במקרא מגילה. ע' ת' בי\"ד סי' תצ\"ו: | \n",
+ " ז | \n",
+ " None | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
42608 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 bookname siman sek \\\n",
+ "0 0 עטרת זקנים על אורח חיים א א \n",
+ "1 1 עטרת זקנים על אורח חיים א ב \n",
+ "2 2 עטרת זקנים על אורח חיים א ג \n",
+ "3 3 עטרת זקנים על אורח חיים א ד \n",
+ "4 4 עטרת זקנים על אורח חיים א ה \n",
+ "... ... ... ... .. \n",
+ "42603 42603 אשל אברהם על שולחן ערוך אורח חיים תרצה ב \n",
+ "42604 42604 אשל אברהם על שולחן ערוך אורח חיים תרצו א \n",
+ "42605 42605 אשל אברהם על שולחן ערוך אורח חיים תרצו ב \n",
+ "42606 42606 אשל אברהם על שולחן ערוך אורח חיים תרצו ג \n",
+ "42607 42607 אשל אברהם על שולחן ערוך אורח חיים תרצו ד \n",
+ "\n",
+ " text seif topic label \n",
+ "0 ומיד כשיעור משנתו ואינו רוצ' לישן יטול ידיו אף... א None 0 \n",
+ "1 המשכים לעסוק בתור' ילבוש עצמו כראוי להכין לקרא... ב None 0 \n",
+ "2 ראוי. צריך כ\"א להשתת' עם צער' של רחל לבכות בכל... ג None 0 \n",
+ "3 טוב לומר סדר הקרבנות דוקא ביום רק משנת איזהו מ... ה None 0 \n",
+ "4 כשיסיים. אחר חטאת ואשם ודאי לא יאמר משום דהאמי... ז None 0 \n",
+ "... ... ... ... ... \n",
+ "42603 מלשלם. עיין רא\"ש סוכה דף מ\"ה ות' בי\"ד סי' תל\"ד: ב None 0 \n",
+ "42604 שלא לעשות. וכשחל בע\"ש מותר לגלח לכבוד שבת לכ\"... א None 0 \n",
+ "42605 ותענית. ות\"ח שרי אלא דצריך למיתב תענית לתענית... ג None 0 \n",
+ "42606 נוהגים. ע' באר היטב לי\"ד סי' ת\"א וכמ\"ש שם: ד None 0 \n",
+ "42607 במקרא מגילה. ע' ת' בי\"ד סי' תצ\"ו: ז None 0 \n",
+ "\n",
+ "[42608 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "df = pd.DataFrame.from_dict(dataset[\"train\"])\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.350152Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.349747Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.374612Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.373783Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.350115Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df['text']=df['text'].dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.377959Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.377725Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.387243Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.385891Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.377934Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df['text']=df['text'].astype(str)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.391401Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.391173Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.439179Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.438089Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.391377Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df.dropna(subset=['siman','seif'],how='any',inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.442995Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.442679Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.457780Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.456539Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.442960Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "df['siman_seif']=df['siman']+' '+df['seif']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.461547Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.461289Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.611254Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.610251Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.461522Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from datasets import Dataset\n",
+ "dataset['train'] = Dataset.from_pandas(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.619670Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.619291Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.641413Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.640389Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.619642Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['א א',\n",
+ " 'א ב',\n",
+ " 'א ג',\n",
+ " 'א ד',\n",
+ " 'א ה',\n",
+ " 'א ו',\n",
+ " 'א ז',\n",
+ " 'א ח',\n",
+ " 'א ט',\n",
+ " 'א א',\n",
+ " 'א ב',\n",
+ " 'א ג',\n",
+ " 'א ד',\n",
+ " 'א ה',\n",
+ " 'א ו',\n",
+ " 'א ז',\n",
+ " 'א ח',\n",
+ " 'א ט',\n",
+ " 'ב א',\n",
+ " 'ב ב',\n",
+ " 'ב ג',\n",
+ " 'ב ד',\n",
+ " 'ב ה',\n",
+ " 'ב ו',\n",
+ " 'ב א',\n",
+ " 'ב ב',\n",
+ " 'ב ג',\n",
+ " 'ב ד',\n",
+ " 'ב ו',\n",
+ " 'ג א',\n",
+ " 'ג ב',\n",
+ " 'ג ג',\n",
+ " 'ג ד',\n",
+ " 'ג ה',\n",
+ " 'ג ו',\n",
+ " 'ג ז',\n",
+ " 'ג ח',\n",
+ " 'ג ט',\n",
+ " 'ג טו',\n",
+ " 'ג טז',\n",
+ " 'ג י',\n",
+ " 'ג יא',\n",
+ " 'ג יב',\n",
+ " 'ג יג',\n",
+ " 'ג יד',\n",
+ " 'ג יז',\n",
+ " 'ג א',\n",
+ " 'ג ב',\n",
+ " 'ג ג',\n",
+ " 'ג ד',\n",
+ " 'ג ה',\n",
+ " 'ג ו',\n",
+ " 'ג ז',\n",
+ " 'ג ח',\n",
+ " 'ג ט',\n",
+ " 'ג טז',\n",
+ " 'ג י',\n",
+ " 'ג יא',\n",
+ " 'ג יב',\n",
+ " 'ג יג',\n",
+ " 'ג יד',\n",
+ " 'ג יז',\n",
+ " 'ד א',\n",
+ " 'ד ב',\n",
+ " 'ד ג',\n",
+ " 'ד ד',\n",
+ " 'ד ה',\n",
+ " 'ד ו',\n",
+ " 'ד ז',\n",
+ " 'ד ח',\n",
+ " 'ד ט',\n",
+ " 'ד טו',\n",
+ " 'ד טז',\n",
+ " 'ד י',\n",
+ " 'ד יא',\n",
+ " 'ד יב',\n",
+ " 'ד יג',\n",
+ " 'ד יד',\n",
+ " 'ד יז',\n",
+ " 'ד יח',\n",
+ " 'ד יט',\n",
+ " 'ד כ',\n",
+ " 'ד כא',\n",
+ " 'ד כב',\n",
+ " 'ד כג',\n",
+ " 'ד א',\n",
+ " 'ד ב',\n",
+ " 'ד ד',\n",
+ " 'ד ז',\n",
+ " 'ד ח',\n",
+ " 'ד טו',\n",
+ " 'ד טז',\n",
+ " 'ד י',\n",
+ " 'ד יא',\n",
+ " 'ד יב',\n",
+ " 'ד יג',\n",
+ " 'ד יד',\n",
+ " 'ד יח',\n",
+ " 'ד כא',\n",
+ " 'ד כב',\n",
+ " 'ד כג',\n",
+ " 'ה א',\n",
+ " 'ה א',\n",
+ " 'ו א',\n",
+ " 'ו ב',\n",
+ " 'ו ג',\n",
+ " 'ו ד',\n",
+ " 'ו א',\n",
+ " 'ו ב',\n",
+ " 'ו ג',\n",
+ " 'ו ד',\n",
+ " 'ז א',\n",
+ " 'ז ב',\n",
+ " 'ז ג',\n",
+ " 'ז ד',\n",
+ " 'ז א',\n",
+ " 'ז ב',\n",
+ " 'ז ג',\n",
+ " 'ח א',\n",
+ " 'ח ב',\n",
+ " 'ח ג',\n",
+ " 'ח ד',\n",
+ " 'ח ה',\n",
+ " 'ח ו',\n",
+ " 'ח ז',\n",
+ " 'ח ח',\n",
+ " 'ח ט',\n",
+ " 'ח טו',\n",
+ " 'ח טז',\n",
+ " 'ח י',\n",
+ " 'ח יא',\n",
+ " 'ח יב',\n",
+ " 'ח יג',\n",
+ " 'ח יד',\n",
+ " 'ח יז',\n",
+ " 'ח א',\n",
+ " 'ח ב',\n",
+ " 'ח ג',\n",
+ " 'ח ד',\n",
+ " 'ח ה',\n",
+ " 'ח ו',\n",
+ " 'ח ז',\n",
+ " 'ח ט',\n",
+ " 'ח טו',\n",
+ " 'ח טז',\n",
+ " 'ח י',\n",
+ " 'ח יא',\n",
+ " 'ח יב',\n",
+ " 'ח יג',\n",
+ " 'ח יד',\n",
+ " 'ח יז',\n",
+ " 'ט א',\n",
+ " 'ט ב',\n",
+ " 'ט ג',\n",
+ " 'ט ד',\n",
+ " 'ט ה',\n",
+ " 'ט ו',\n",
+ " 'ט א',\n",
+ " 'ט ב',\n",
+ " 'ט ג',\n",
+ " 'ט ד',\n",
+ " 'ט ה',\n",
+ " 'ט ו',\n",
+ " 'טו א',\n",
+ " 'טו ב',\n",
+ " 'טו ג',\n",
+ " 'טו ד',\n",
+ " 'טו ה',\n",
+ " 'טו ו',\n",
+ " 'טו א',\n",
+ " 'טו ב',\n",
+ " 'טו ג',\n",
+ " 'טו ד',\n",
+ " 'טו ה',\n",
+ " 'טו ו',\n",
+ " 'טז א',\n",
+ " 'טז א',\n",
+ " 'י א',\n",
+ " 'י ב',\n",
+ " 'י ג',\n",
+ " 'י ד',\n",
+ " 'י ה',\n",
+ " 'י ו',\n",
+ " 'י ז',\n",
+ " 'י ח',\n",
+ " 'י ט',\n",
+ " 'י י',\n",
+ " 'י יא',\n",
+ " 'י יב',\n",
+ " 'י א',\n",
+ " 'י ב',\n",
+ " 'י ג',\n",
+ " 'י ד',\n",
+ " 'י ה',\n",
+ " 'י ו',\n",
+ " 'י ז',\n",
+ " 'י ח',\n",
+ " 'י יא',\n",
+ " 'י יב',\n",
+ " 'יא א',\n",
+ " 'יא ב',\n",
+ " 'יא ג',\n",
+ " 'יא ד',\n",
+ " 'יא ה',\n",
+ " 'יא ו',\n",
+ " 'יא ז',\n",
+ " 'יא ח',\n",
+ " 'יא ט',\n",
+ " 'יא טו',\n",
+ " 'יא י',\n",
+ " 'יא יא',\n",
+ " 'יא יב',\n",
+ " 'יא יג',\n",
+ " 'יא יד',\n",
+ " 'יא א',\n",
+ " 'יא ב',\n",
+ " 'יא ג',\n",
+ " 'יא ד',\n",
+ " 'יא ה',\n",
+ " 'יא ו',\n",
+ " 'יא ז',\n",
+ " 'יא ח',\n",
+ " 'יא ט',\n",
+ " 'יא טו',\n",
+ " 'יא י',\n",
+ " 'יא יא',\n",
+ " 'יא יב',\n",
+ " 'יא יג',\n",
+ " 'יא יד',\n",
+ " 'יב א',\n",
+ " 'יב ב',\n",
+ " 'יב ג',\n",
+ " 'יב א',\n",
+ " 'יב ב',\n",
+ " 'יב ג',\n",
+ " 'יג א',\n",
+ " 'יג ב',\n",
+ " 'יג ג',\n",
+ " 'יג א',\n",
+ " 'יג ב',\n",
+ " 'יג ג',\n",
+ " 'יד א',\n",
+ " 'יד ב',\n",
+ " 'יד ג',\n",
+ " 'יד ד',\n",
+ " 'יד ה',\n",
+ " 'יד א',\n",
+ " 'יד ב',\n",
+ " 'יד ג',\n",
+ " 'יד ד',\n",
+ " 'יז א',\n",
+ " 'יז ב',\n",
+ " 'יז ג',\n",
+ " 'יז א',\n",
+ " 'יז ב',\n",
+ " 'יז ג',\n",
+ " 'יח א',\n",
+ " 'יח ב',\n",
+ " 'יח ג',\n",
+ " 'יח א',\n",
+ " 'יח ב',\n",
+ " 'יח ג',\n",
+ " 'יט א',\n",
+ " 'יט ב',\n",
+ " 'יט א',\n",
+ " 'יט ב',\n",
+ " 'כ א',\n",
+ " 'כ ב',\n",
+ " 'כ א',\n",
+ " 'כ ב',\n",
+ " 'כא א',\n",
+ " 'כא ב',\n",
+ " 'כא ג',\n",
+ " 'כא ד',\n",
+ " 'כא א',\n",
+ " 'כא ג',\n",
+ " 'כא ד',\n",
+ " 'כב א',\n",
+ " 'כב א',\n",
+ " 'כג א',\n",
+ " 'כג ב',\n",
+ " 'כג ג',\n",
+ " 'כג ד',\n",
+ " 'כג א',\n",
+ " 'כג ב',\n",
+ " 'כג ג',\n",
+ " 'כג ד',\n",
+ " 'כד א',\n",
+ " 'כד ב',\n",
+ " 'כד ג',\n",
+ " 'כד ד',\n",
+ " 'כד ה',\n",
+ " 'כד ו',\n",
+ " 'כד א',\n",
+ " 'כד ב',\n",
+ " 'כד ד',\n",
+ " 'כד ה',\n",
+ " 'כד ו',\n",
+ " 'כה א',\n",
+ " 'כה ב',\n",
+ " 'כה ג',\n",
+ " 'כה ד',\n",
+ " 'כה ה',\n",
+ " 'כה ו',\n",
+ " 'כה ז',\n",
+ " 'כה ח',\n",
+ " 'כה ט',\n",
+ " 'כה י',\n",
+ " 'כה יא',\n",
+ " 'כה יב',\n",
+ " 'כה יג',\n",
+ " 'כה א',\n",
+ " 'כה ב',\n",
+ " 'כה ג',\n",
+ " 'כה ה',\n",
+ " 'כה ו',\n",
+ " 'כה ז',\n",
+ " 'כה ח',\n",
+ " 'כה ט',\n",
+ " 'כה י',\n",
+ " 'כה יא',\n",
+ " 'כה יב',\n",
+ " 'כה יג',\n",
+ " 'כו א',\n",
+ " 'כו ב',\n",
+ " 'כו א',\n",
+ " 'כז א',\n",
+ " 'כז ב',\n",
+ " 'כז ג',\n",
+ " 'כז ד',\n",
+ " 'כז ה',\n",
+ " 'כז ו',\n",
+ " 'כז ז',\n",
+ " 'כז ח',\n",
+ " 'כז ט',\n",
+ " 'כז י',\n",
+ " 'כז יא',\n",
+ " 'כז א',\n",
+ " 'כז ב',\n",
+ " 'כז ד',\n",
+ " 'כז ה',\n",
+ " 'כז ו',\n",
+ " 'כז ז',\n",
+ " 'כז ח',\n",
+ " 'כז ט',\n",
+ " 'כז י',\n",
+ " 'כז יא',\n",
+ " 'כח א',\n",
+ " 'כח ב',\n",
+ " 'כח ג',\n",
+ " 'כח א',\n",
+ " 'כח ב',\n",
+ " 'כט א',\n",
+ " 'כט א',\n",
+ " 'ל א',\n",
+ " 'ל ב',\n",
+ " 'ל ג',\n",
+ " 'ל ד',\n",
+ " 'ל ה',\n",
+ " 'ל א',\n",
+ " 'ל ב',\n",
+ " 'ל ג',\n",
+ " 'ל ד',\n",
+ " 'ל ה',\n",
+ " 'לא א',\n",
+ " 'לא ב',\n",
+ " 'לא א',\n",
+ " 'לא ב',\n",
+ " 'לב א',\n",
+ " 'לב ב',\n",
+ " 'לב ג',\n",
+ " 'לב ד',\n",
+ " 'לב ה',\n",
+ " 'לב ו',\n",
+ " 'לב ז',\n",
+ " 'לב ח',\n",
+ " 'לב ט',\n",
+ " 'לב טו',\n",
+ " 'לב טז',\n",
+ " 'לב י',\n",
+ " 'לב יא',\n",
+ " 'לב יב',\n",
+ " 'לב יג',\n",
+ " 'לב יד',\n",
+ " 'לב יז',\n",
+ " 'לב יח',\n",
+ " 'לב יט',\n",
+ " 'לב כ',\n",
+ " 'לב כא',\n",
+ " 'לב כב',\n",
+ " 'לב כג',\n",
+ " 'לב כד',\n",
+ " 'לב כה',\n",
+ " 'לב כו',\n",
+ " 'לב כז',\n",
+ " 'לב כח',\n",
+ " 'לב כט',\n",
+ " 'לב ל',\n",
+ " 'לב לא',\n",
+ " 'לב לב',\n",
+ " 'לב לג',\n",
+ " 'לב לד',\n",
+ " 'לב לה',\n",
+ " 'לב לו',\n",
+ " 'לב לז',\n",
+ " 'לב לח',\n",
+ " 'לב לט',\n",
+ " 'לב מ',\n",
+ " 'לב מא',\n",
+ " 'לב מב',\n",
+ " 'לב מג',\n",
+ " 'לב מד',\n",
+ " 'לב מה',\n",
+ " 'לב מו',\n",
+ " 'לב מז',\n",
+ " 'לב מח',\n",
+ " 'לב מט',\n",
+ " 'לב נ',\n",
+ " 'לב נא',\n",
+ " 'לב נב',\n",
+ " 'לב א',\n",
+ " 'לב ב',\n",
+ " 'לב ג',\n",
+ " 'לב ד',\n",
+ " 'לב ה',\n",
+ " 'לב ו',\n",
+ " 'לב ז',\n",
+ " 'לב ח',\n",
+ " 'לב ט',\n",
+ " 'לב טו',\n",
+ " 'לב טז',\n",
+ " 'לב י',\n",
+ " 'לב יא',\n",
+ " 'לב יב',\n",
+ " 'לב יג',\n",
+ " 'לב יד',\n",
+ " 'לב יז',\n",
+ " 'לב יח',\n",
+ " 'לב יט',\n",
+ " 'לב כ',\n",
+ " 'לב כג',\n",
+ " 'לב כד',\n",
+ " 'לב כה',\n",
+ " 'לב כז',\n",
+ " 'לב כח',\n",
+ " 'לב כט',\n",
+ " 'לב לא',\n",
+ " 'לב לב',\n",
+ " 'לב לג',\n",
+ " 'לב לה',\n",
+ " 'לב לו',\n",
+ " 'לב לז',\n",
+ " 'לב לח',\n",
+ " 'לב לט',\n",
+ " 'לב מ',\n",
+ " 'לב מא',\n",
+ " 'לב מב',\n",
+ " 'לב מד',\n",
+ " 'לב מה',\n",
+ " 'לב מז',\n",
+ " 'לב מט',\n",
+ " 'לב נ',\n",
+ " 'לב נא',\n",
+ " 'לב נב',\n",
+ " 'לג א',\n",
+ " 'לג ב',\n",
+ " 'לג ג',\n",
+ " 'לג ד',\n",
+ " 'לג ה',\n",
+ " 'לג א',\n",
+ " 'לג ג',\n",
+ " 'לג ד',\n",
+ " 'לג ה',\n",
+ " 'לד א',\n",
+ " 'לד ב',\n",
+ " 'לד ג',\n",
+ " 'לד ד',\n",
+ " 'לד א',\n",
+ " 'לד ב',\n",
+ " 'לד ג',\n",
+ " 'לד ד',\n",
+ " 'לה א',\n",
+ " 'לה ד',\n",
+ " 'לה א',\n",
+ " 'לו א',\n",
+ " 'לו ב',\n",
+ " 'לו ג',\n",
+ " 'לו א',\n",
+ " 'לו ב',\n",
+ " 'לו ג',\n",
+ " 'לז א',\n",
+ " 'לז ב',\n",
+ " 'לז ג',\n",
+ " 'לז א',\n",
+ " 'לז ב',\n",
+ " 'לז ג',\n",
+ " 'לח א',\n",
+ " 'לח ב',\n",
+ " 'לח ג',\n",
+ " 'לח ד',\n",
+ " 'לח ה',\n",
+ " 'לח ו',\n",
+ " 'לח ז',\n",
+ " 'לח ח',\n",
+ " 'לח ט',\n",
+ " 'לח י',\n",
+ " 'לח יא',\n",
+ " 'לח יב',\n",
+ " 'לח יג',\n",
+ " 'לח א',\n",
+ " 'לח ב',\n",
+ " 'לח ג',\n",
+ " 'לח ד',\n",
+ " 'לח ה',\n",
+ " 'לח ז',\n",
+ " 'לח ח',\n",
+ " 'לח ט',\n",
+ " 'לח י',\n",
+ " 'לח יא',\n",
+ " 'לח יב',\n",
+ " 'לט א',\n",
+ " 'לט ב',\n",
+ " 'לט ג',\n",
+ " 'לט ד',\n",
+ " 'לט ה',\n",
+ " 'לט ו',\n",
+ " 'לט ז',\n",
+ " 'לט ח',\n",
+ " 'לט ט',\n",
+ " 'לט י',\n",
+ " 'לט א',\n",
+ " 'לט ב',\n",
+ " 'לט ג',\n",
+ " 'לט ד',\n",
+ " 'לט ו',\n",
+ " 'לט ז',\n",
+ " 'לט ח',\n",
+ " 'לט ט',\n",
+ " 'לט י',\n",
+ " 'מ א',\n",
+ " 'מ ב',\n",
+ " 'מ ג',\n",
+ " 'מ ד',\n",
+ " 'מ ה',\n",
+ " 'מ ו',\n",
+ " 'מ ז',\n",
+ " 'מ ח',\n",
+ " 'מ א',\n",
+ " 'מ ב',\n",
+ " 'מ ג',\n",
+ " 'מ ד',\n",
+ " 'מ ו',\n",
+ " 'מ ז',\n",
+ " 'מא א',\n",
+ " 'מא א',\n",
+ " 'מב א',\n",
+ " 'מב ב',\n",
+ " 'מב ג',\n",
+ " 'מב א',\n",
+ " 'מב ג',\n",
+ " 'מג א',\n",
+ " 'מג ב',\n",
+ " 'מג ג',\n",
+ " 'מג ד',\n",
+ " 'מג ה',\n",
+ " 'מג ו',\n",
+ " 'מג ז',\n",
+ " 'מג ח',\n",
+ " 'מג ט',\n",
+ " 'מג א',\n",
+ " 'מג ב',\n",
+ " 'מג ג',\n",
+ " 'מג ד',\n",
+ " 'מג ה',\n",
+ " 'מג ו',\n",
+ " 'מג ז',\n",
+ " 'מג ח',\n",
+ " 'מד א',\n",
+ " 'מד א',\n",
+ " 'מה א',\n",
+ " 'מה ב',\n",
+ " 'מה א',\n",
+ " 'מה ב',\n",
+ " 'מו א',\n",
+ " 'מו ב',\n",
+ " 'מו ג',\n",
+ " 'מו ד',\n",
+ " 'מו ה',\n",
+ " 'מו ו',\n",
+ " 'מו ז',\n",
+ " 'מו ח',\n",
+ " 'מו ט',\n",
+ " 'מו א',\n",
+ " 'מו ב',\n",
+ " 'מו ג',\n",
+ " 'מו ד',\n",
+ " 'מו ה',\n",
+ " 'מו ו',\n",
+ " 'מו ז',\n",
+ " 'מו ח',\n",
+ " 'מו ט',\n",
+ " 'מז א',\n",
+ " 'מז ב',\n",
+ " 'מז ג',\n",
+ " 'מז ד',\n",
+ " 'מז ה',\n",
+ " 'מז ו',\n",
+ " 'מז ז',\n",
+ " 'מז ח',\n",
+ " 'מז ט',\n",
+ " 'מז י',\n",
+ " 'מז יא',\n",
+ " 'מז יב',\n",
+ " 'מז יג',\n",
+ " 'מז יד',\n",
+ " 'מז א',\n",
+ " 'מז ג',\n",
+ " 'מז ד',\n",
+ " 'מז ה',\n",
+ " 'מז ו',\n",
+ " 'מז ז',\n",
+ " 'מז ח',\n",
+ " 'מז ט',\n",
+ " 'מז י',\n",
+ " 'מז יא',\n",
+ " 'מז יב',\n",
+ " 'מז יג',\n",
+ " 'מז יד',\n",
+ " 'מח *',\n",
+ " 'מח א',\n",
+ " 'מח יד',\n",
+ " 'מח א',\n",
+ " 'מט א',\n",
+ " 'מט א',\n",
+ " 'נ א',\n",
+ " 'נ א',\n",
+ " 'נא א',\n",
+ " 'נא ב',\n",
+ " 'נא ג',\n",
+ " 'נא ד',\n",
+ " 'נא ה',\n",
+ " 'נא ו',\n",
+ " 'נא ז',\n",
+ " 'נא ח',\n",
+ " 'נא ט',\n",
+ " 'נא א',\n",
+ " 'נא ב',\n",
+ " 'נא ג',\n",
+ " 'נא ד',\n",
+ " 'נא ה',\n",
+ " 'נא ו',\n",
+ " 'נא ז',\n",
+ " 'נא ט',\n",
+ " 'נב א',\n",
+ " 'נב א',\n",
+ " 'נג א',\n",
+ " 'נג ב',\n",
+ " 'נג ג',\n",
+ " 'נג ד',\n",
+ " 'נג ה',\n",
+ " 'נג ו',\n",
+ " 'נג ז',\n",
+ " 'נג ח',\n",
+ " 'נג ט',\n",
+ " 'נג טו',\n",
+ " 'נג טז',\n",
+ " 'נג י',\n",
+ " 'נג יא',\n",
+ " 'נג יב',\n",
+ " 'נג יג',\n",
+ " 'נג יד',\n",
+ " 'נג יז',\n",
+ " 'נג יח',\n",
+ " 'נג יט',\n",
+ " 'נג כ',\n",
+ " 'נג כא',\n",
+ " 'נג כב',\n",
+ " 'נג כג',\n",
+ " 'נג כד',\n",
+ " 'נג כה',\n",
+ " 'נג כו',\n",
+ " 'נג א',\n",
+ " 'נג ב',\n",
+ " 'נג ג',\n",
+ " 'נג ד',\n",
+ " 'נג ה',\n",
+ " 'נג ו',\n",
+ " 'נג ט',\n",
+ " 'נג טז',\n",
+ " 'נג י',\n",
+ " 'נג יא',\n",
+ " 'נג יב',\n",
+ " 'נג יד',\n",
+ " 'נג יח',\n",
+ " 'נג יט',\n",
+ " 'נג כ',\n",
+ " 'נג כא',\n",
+ " 'נג כב',\n",
+ " 'נג כג',\n",
+ " 'נג כה',\n",
+ " 'נג כו',\n",
+ " 'נד א',\n",
+ " 'נד ב',\n",
+ " 'נד ג',\n",
+ " 'נד א',\n",
+ " 'נד ב',\n",
+ " 'נד ג',\n",
+ " 'נה א',\n",
+ " 'נה ב',\n",
+ " 'נה ג',\n",
+ " 'נה ד',\n",
+ " 'נה ה',\n",
+ " 'נה ו',\n",
+ " 'נה ז',\n",
+ " 'נה ח',\n",
+ " 'נה ט',\n",
+ " 'נה טו',\n",
+ " 'נה טז',\n",
+ " 'נה י',\n",
+ " 'נה יא',\n",
+ " 'נה יב',\n",
+ " 'נה יג',\n",
+ " 'נה יד',\n",
+ " 'נה יז',\n",
+ " 'נה יח',\n",
+ " 'נה יט',\n",
+ " 'נה כ',\n",
+ " 'נה כא',\n",
+ " 'נה כב',\n",
+ " 'נה א',\n",
+ " 'נה ב',\n",
+ " 'נה ג',\n",
+ " 'נה ד',\n",
+ " 'נה ה',\n",
+ " 'נה ו',\n",
+ " 'נה ז',\n",
+ " 'נה טז',\n",
+ " 'נה י',\n",
+ " 'נה יג',\n",
+ " 'נה יד',\n",
+ " 'נה יט',\n",
+ " 'נה כ',\n",
+ " 'נה כא',\n",
+ " 'נה כב',\n",
+ " 'נו א',\n",
+ " 'נו ב',\n",
+ " 'נו ג',\n",
+ " 'נו ד',\n",
+ " 'נו ה',\n",
+ " 'נו א',\n",
+ " 'נו ב',\n",
+ " 'נו ג',\n",
+ " 'נז א',\n",
+ " 'נז ב',\n",
+ " 'נז א',\n",
+ " 'נח א',\n",
+ " 'נח ב',\n",
+ " 'נח ג',\n",
+ " 'נח ד',\n",
+ " 'נח ה',\n",
+ " 'נח ו',\n",
+ " 'נח ז',\n",
+ " 'נח א',\n",
+ " 'נח ב',\n",
+ " 'נח ג',\n",
+ " 'נח ד',\n",
+ " 'נח ה',\n",
+ " 'נח ו',\n",
+ " 'נח ז',\n",
+ " 'נט א',\n",
+ " 'נט ב',\n",
+ " 'נט ג',\n",
+ " 'נט ד',\n",
+ " 'נט ה',\n",
+ " 'נט א',\n",
+ " 'נט ב',\n",
+ " 'נט ג',\n",
+ " 'נט ד',\n",
+ " 'נט ה',\n",
+ " 'ס א',\n",
+ " 'ס ב',\n",
+ " 'ס ג',\n",
+ " 'ס ד',\n",
+ " 'ס ה',\n",
+ " 'ס א',\n",
+ " 'ס ב',\n",
+ " 'ס ג',\n",
+ " 'ס ד',\n",
+ " 'ס ה',\n",
+ " 'סא א',\n",
+ " 'סא ב',\n",
+ " 'סא ג',\n",
+ " 'סא ד',\n",
+ " 'סא ה',\n",
+ " 'סא ו',\n",
+ " 'סא ז',\n",
+ " 'סא ח',\n",
+ " 'סא ט',\n",
+ " 'סא טו',\n",
+ " 'סא טז',\n",
+ " 'סא י',\n",
+ " 'סא יא',\n",
+ " 'סא יב',\n",
+ " 'סא יג',\n",
+ " 'סא יד',\n",
+ " 'סא יז',\n",
+ " 'סא יח',\n",
+ " 'סא יט',\n",
+ " 'סא כ',\n",
+ " 'סא כא',\n",
+ " 'סא כב',\n",
+ " 'סא כג',\n",
+ " 'סא כד',\n",
+ " 'סא כה',\n",
+ " 'סא כו',\n",
+ " 'סא א',\n",
+ " 'סא ג',\n",
+ " 'סא ה',\n",
+ " 'סא ו',\n",
+ " 'סא ז',\n",
+ " 'סא ט',\n",
+ " 'סא י',\n",
+ " 'סא יא',\n",
+ " 'סא יב',\n",
+ " 'סא יג',\n",
+ " 'סא יד',\n",
+ " 'סא כ',\n",
+ " 'סא כא',\n",
+ " 'סא כג',\n",
+ " 'סא כד',\n",
+ " 'סא כה',\n",
+ " 'סא כו',\n",
+ " 'סב א',\n",
+ " 'סב ב',\n",
+ " 'סב ג',\n",
+ " 'סב ד',\n",
+ " 'סב ה',\n",
+ " 'סב א',\n",
+ " 'סב ב',\n",
+ " 'סב ד',\n",
+ " 'סב ה',\n",
+ " 'סג א',\n",
+ " 'סג ב',\n",
+ " 'סג ג',\n",
+ " 'סג ד',\n",
+ " 'סג ה',\n",
+ " 'סג ו',\n",
+ " 'סג ז',\n",
+ " 'סג ח',\n",
+ " 'סג ט',\n",
+ " 'סג א',\n",
+ " 'סג ב',\n",
+ " 'סג ג',\n",
+ " 'סג ד',\n",
+ " 'סג ה',\n",
+ " 'סג ו',\n",
+ " 'סד א',\n",
+ " 'סד ב',\n",
+ " 'סד ג',\n",
+ " 'סד ד',\n",
+ " 'סד ב',\n",
+ " 'סד ד',\n",
+ " 'סה א',\n",
+ " 'סה ב',\n",
+ " 'סה ג',\n",
+ " 'סה א',\n",
+ " 'סה ב',\n",
+ " 'סו א',\n",
+ " 'סו ב',\n",
+ " 'סו ג',\n",
+ " 'סו ד',\n",
+ " 'סו ה',\n",
+ " 'סו ו',\n",
+ " 'סו ז',\n",
+ " 'סו ח',\n",
+ " 'סו ט',\n",
+ " 'סו י',\n",
+ " 'סו א',\n",
+ " 'סו ב',\n",
+ " 'סו ג',\n",
+ " 'סו ד',\n",
+ " 'סו ה',\n",
+ " 'סו ז',\n",
+ " 'סו ח',\n",
+ " 'סו י',\n",
+ " 'סז א',\n",
+ " 'סז א',\n",
+ " 'סח א',\n",
+ " 'סח א',\n",
+ " 'סט א',\n",
+ " 'סט ב',\n",
+ " 'סט א',\n",
+ " 'סט ב',\n",
+ " 'ע א',\n",
+ " 'ע ב',\n",
+ " 'ע ג',\n",
+ " 'ע ד',\n",
+ " 'ע ה',\n",
+ " 'ע א',\n",
+ " 'ע ג',\n",
+ " 'ע ד',\n",
+ " 'ע ה',\n",
+ " 'עא א',\n",
+ " 'עא ב',\n",
+ " 'עא ג',\n",
+ " 'עא ד',\n",
+ " 'עא ה',\n",
+ " 'עא ו',\n",
+ " 'עא ז',\n",
+ " 'עא א',\n",
+ " 'עא ב',\n",
+ " 'עא ז',\n",
+ " 'עב א',\n",
+ " 'עב ב',\n",
+ " 'עב ג',\n",
+ " 'עב ד',\n",
+ " 'עב ה',\n",
+ " 'עב א',\n",
+ " 'עב ב',\n",
+ " 'עב ג',\n",
+ " 'עב ד',\n",
+ " 'עב ה',\n",
+ " 'עג א',\n",
+ " 'עג ב',\n",
+ " 'עג ג',\n",
+ " 'עג ד',\n",
+ " 'עג א',\n",
+ " 'עג ב',\n",
+ " 'עג ג',\n",
+ " 'עג ד',\n",
+ " 'עד א',\n",
+ " 'עד ב',\n",
+ " 'עד ג',\n",
+ " 'עד ד',\n",
+ " 'עד ה',\n",
+ " 'עד ו',\n",
+ " 'עד א',\n",
+ " 'עד ב',\n",
+ " 'עד ג',\n",
+ " 'עד ד',\n",
+ " 'עד ה',\n",
+ " 'עד ו',\n",
+ " 'עה א',\n",
+ " 'עה ב',\n",
+ " 'עה ג',\n",
+ " 'עה ד',\n",
+ " 'עה ה',\n",
+ " 'עה ו',\n",
+ " 'עה א',\n",
+ " 'עה ב',\n",
+ " 'עה ג',\n",
+ " 'עה ד',\n",
+ " 'עה ו',\n",
+ " 'עו א',\n",
+ " 'עו ב',\n",
+ " 'עו ג',\n",
+ " 'עו ד',\n",
+ " 'עו ה',\n",
+ " 'עו ו',\n",
+ " 'עו ז',\n",
+ " 'עו ח',\n",
+ " 'עו ב',\n",
+ " 'עו ג',\n",
+ " 'עו ד',\n",
+ " 'עו ה',\n",
+ " 'עו ו',\n",
+ " 'עו ז',\n",
+ " 'עו ח',\n",
+ " 'עז א',\n",
+ " 'עז ב',\n",
+ " 'עז ב',\n",
+ " 'עח א',\n",
+ " 'עח א',\n",
+ " 'עט א',\n",
+ " 'עט ב',\n",
+ " 'עט ג',\n",
+ " 'עט ד',\n",
+ " 'עט ה',\n",
+ " 'עט ו',\n",
+ " 'עט ז',\n",
+ " 'עט ח',\n",
+ " 'עט ט',\n",
+ " 'עט א',\n",
+ " 'עט ב',\n",
+ " 'עט ג',\n",
+ " 'עט ה',\n",
+ " 'עט ו',\n",
+ " 'עט ז',\n",
+ " 'עט ט',\n",
+ " 'פ א',\n",
+ " 'פ א',\n",
+ " 'פא א',\n",
+ " 'פא ב',\n",
+ " 'פא ��',\n",
+ " 'פא ב',\n",
+ " 'פב א',\n",
+ " 'פב ב',\n",
+ " 'פב א',\n",
+ " 'פב ב',\n",
+ " 'פג א',\n",
+ " ...]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#topics=df[\"bookname\"].unique().tolist() #use this for book classification\n",
+ "simanim=df[\"siman_seif\"].unique().tolist()\n",
+ "simanim.sort()\n",
+ "simanim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.645806Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.645511Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.923491Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.922623Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.645771Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f8e912baca3a40fabbf93813f1dd6455",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Casting the dataset: 0%| | 0/42608 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from datasets import ClassLabel\n",
+ "new_features = dataset[\"train\"].features.copy()\n",
+ "new_features[\"label\"] = ClassLabel(names=simanim)\n",
+ "dataset[\"train\"] = dataset[\"train\"].cast(new_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.926964Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.926718Z",
+ "iopub.status.idle": "2023-12-05T04:04:28.931514Z",
+ "shell.execute_reply": "2023-12-05T04:04:28.930643Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.926938Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def category(example): \n",
+ " example['label']= simanim.index(example['siman_seif'])\n",
+ " return example "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "_kg_hide-output": true,
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:28.935553Z",
+ "iopub.status.busy": "2023-12-05T04:04:28.935239Z",
+ "iopub.status.idle": "2023-12-05T04:04:38.447363Z",
+ "shell.execute_reply": "2023-12-05T04:04:38.446439Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:28.935519Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c23e89b01a44489cb36c962b74a50865",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/42608 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "dataset=dataset.map(category)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:38.451272Z",
+ "iopub.status.busy": "2023-12-05T04:04:38.450997Z",
+ "iopub.status.idle": "2023-12-05T04:04:38.500772Z",
+ "shell.execute_reply": "2023-12-05T04:04:38.499972Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:38.451237Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "dataset = dataset.remove_columns(['Unnamed: 0','bookname', 'topic', 'sek', 'seif','siman','siman_seif'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:38.504802Z",
+ "iopub.status.busy": "2023-12-05T04:04:38.504470Z",
+ "iopub.status.idle": "2023-12-05T04:04:38.512167Z",
+ "shell.execute_reply": "2023-12-05T04:04:38.511182Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:38.504767Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "DatasetDict({\n",
+ " train: Dataset({\n",
+ " features: ['text', 'label'],\n",
+ " num_rows: 42608\n",
+ " })\n",
+ "})"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:38.517767Z",
+ "iopub.status.busy": "2023-12-05T04:04:38.517458Z",
+ "iopub.status.idle": "2023-12-05T04:04:38.639195Z",
+ "shell.execute_reply": "2023-12-05T04:04:38.638328Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:38.517740Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "dataset=dataset[\"train\"].train_test_split(test_size=0.1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:38.642698Z",
+ "iopub.status.busy": "2023-12-05T04:04:38.642425Z",
+ "iopub.status.idle": "2023-12-05T04:04:38.782300Z",
+ "shell.execute_reply": "2023-12-05T04:04:38.781301Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:38.642673Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "dataset = dataset.shuffle()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:38.786733Z",
+ "iopub.status.busy": "2023-12-05T04:04:38.786376Z",
+ "iopub.status.idle": "2023-12-05T04:04:50.222441Z",
+ "shell.execute_reply": "2023-12-05T04:04:50.221289Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:38.786693Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "78592d67b56f43b681f318af4246e48b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/1.29k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ba522a5841de4ba1ab094822fa5d0b9f",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "vocab.txt: 0%| | 0.00/1.46M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c05f0fac56104d2ebd018a95e15d3088",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/3.54M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "05265e2ce8214f6b92f17f126a08b223",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/695 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "52023f5881334c85871b20a414b9712b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "config.json: 0%| | 0.00/633k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9a5014441a524abe92afcf29c1caa455",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/761M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of BertForMaskedLM were not initialized from the model checkpoint at sivan22/halacha-siman-seif-classifier and are newly initialized: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.bias']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "BertForMaskedLM(\n",
+ " (bert): BertModel(\n",
+ " (embeddings): BertEmbeddings(\n",
+ " (word_embeddings): Embedding(128000, 768, padding_idx=0)\n",
+ " (position_embeddings): Embedding(512, 768)\n",
+ " (token_type_embeddings): Embedding(2, 768)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (encoder): BertEncoder(\n",
+ " (layer): ModuleList(\n",
+ " (0): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (1): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (2): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (3): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (4): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (5): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (6): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (7): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (8): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (9): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (10): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (11): BertLayer(\n",
+ " (attention): BertAttention(\n",
+ " (self): BertSelfAttention(\n",
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " (output): BertSelfOutput(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " (intermediate): BertIntermediate(\n",
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+ " (intermediate_act_fn): GELUActivation()\n",
+ " )\n",
+ " (output): BertOutput(\n",
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " (cls): BertOnlyMLMHead(\n",
+ " (predictions): BertLMPredictionHead(\n",
+ " (transform): BertPredictionHeadTransform(\n",
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+ " (transform_act_fn): GELUActivation()\n",
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+ " )\n",
+ " (decoder): Linear(in_features=768, out_features=128000, bias=True)\n",
+ " )\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from transformers import AutoTokenizer, BertForMaskedLM\n",
+ "\n",
+ "tokenizer = AutoTokenizer.from_pretrained('sivan22/halacha-siman-seif-classifier')\n",
+ "model = BertForMaskedLM.from_pretrained('sivan22/halacha-siman-seif-classifier')\n",
+ "\n",
+ "# for evaluation, disable dropout\n",
+ "model.eval()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:50.227267Z",
+ "iopub.status.busy": "2023-12-05T04:04:50.226896Z",
+ "iopub.status.idle": "2023-12-05T04:04:50.234283Z",
+ "shell.execute_reply": "2023-12-05T04:04:50.233090Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:50.227228Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def preprocess_function(examples):\n",
+ " return tokenizer(examples[\"text\"], truncation=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:50.239168Z",
+ "iopub.status.busy": "2023-12-05T04:04:50.238822Z",
+ "iopub.status.idle": "2023-12-05T04:04:56.687632Z",
+ "shell.execute_reply": "2023-12-05T04:04:56.686783Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:50.239132Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "87a846cc5c7d4f83a906028b968fddce",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/38347 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "93832654760e4db9af952da3a3508fbc",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/4261 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "tokenized_dataset = dataset.map(preprocess_function, batched=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:04:56.691513Z",
+ "iopub.status.busy": "2023-12-05T04:04:56.691282Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.006936Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.005921Z",
+ "shell.execute_reply.started": "2023-12-05T04:04:56.691488Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from transformers import DataCollatorWithPadding\n",
+ "\n",
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.018406Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.018113Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.376438Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.375571Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.018381Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c84a83d0fe544e48aa4b4d0d800e82dd",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading builder script: 0%| | 0.00/4.20k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import evaluate\n",
+ "\n",
+ "accuracy = evaluate.load(\"accuracy\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.379433Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.379203Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.383938Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.383118Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.379408Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "\n",
+ "def compute_metrics(eval_pred):\n",
+ " predictions, labels = eval_pred\n",
+ " predictions = np.argmax(predictions, axis=1)\n",
+ " return accuracy.compute(predictions=predictions, references=labels)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.386997Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.386764Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.390842Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.390039Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.386974Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if None in simanim:\n",
+ " simanim.remove(None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.393569Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.393283Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.410788Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.410140Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.393546Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['א א',\n",
+ " 'א ב',\n",
+ " 'א ג',\n",
+ " 'א ד',\n",
+ " 'א ה',\n",
+ " 'א ו',\n",
+ " 'א ז',\n",
+ " 'א ח',\n",
+ " 'א ט',\n",
+ " 'א א',\n",
+ " 'א ב',\n",
+ " 'א ג',\n",
+ " 'א ד',\n",
+ " 'א ה',\n",
+ " 'א ו',\n",
+ " 'א ז',\n",
+ " 'א ח',\n",
+ " 'א ט',\n",
+ " 'ב א',\n",
+ " 'ב ב',\n",
+ " 'ב ג',\n",
+ " 'ב ד',\n",
+ " 'ב ה',\n",
+ " 'ב ו',\n",
+ " 'ב א',\n",
+ " 'ב ב',\n",
+ " 'ב ג',\n",
+ " 'ב ד',\n",
+ " 'ב ו',\n",
+ " 'ג א',\n",
+ " 'ג ב',\n",
+ " 'ג ג',\n",
+ " 'ג ד',\n",
+ " 'ג ה',\n",
+ " 'ג ו',\n",
+ " 'ג ז',\n",
+ " 'ג ח',\n",
+ " 'ג ט',\n",
+ " 'ג טו',\n",
+ " 'ג טז',\n",
+ " 'ג י',\n",
+ " 'ג יא',\n",
+ " 'ג יב',\n",
+ " 'ג יג',\n",
+ " 'ג יד',\n",
+ " 'ג יז',\n",
+ " 'ג א',\n",
+ " 'ג ב',\n",
+ " 'ג ג',\n",
+ " 'ג ד',\n",
+ " 'ג ה',\n",
+ " 'ג ו',\n",
+ " 'ג ז',\n",
+ " 'ג ח',\n",
+ " 'ג ט',\n",
+ " 'ג טז',\n",
+ " 'ג י',\n",
+ " 'ג יא',\n",
+ " 'ג יב',\n",
+ " 'ג יג',\n",
+ " 'ג יד',\n",
+ " 'ג יז',\n",
+ " 'ד א',\n",
+ " 'ד ב',\n",
+ " 'ד ג',\n",
+ " 'ד ד',\n",
+ " 'ד ה',\n",
+ " 'ד ו',\n",
+ " 'ד ז',\n",
+ " 'ד ח',\n",
+ " 'ד ט',\n",
+ " 'ד טו',\n",
+ " 'ד טז',\n",
+ " 'ד י',\n",
+ " 'ד יא',\n",
+ " 'ד יב',\n",
+ " 'ד יג',\n",
+ " 'ד יד',\n",
+ " 'ד יז',\n",
+ " 'ד יח',\n",
+ " 'ד יט',\n",
+ " 'ד כ',\n",
+ " 'ד כא',\n",
+ " 'ד כב',\n",
+ " 'ד כג',\n",
+ " 'ד א',\n",
+ " 'ד ב',\n",
+ " 'ד ד',\n",
+ " 'ד ז',\n",
+ " 'ד ח',\n",
+ " 'ד טו',\n",
+ " 'ד טז',\n",
+ " 'ד י',\n",
+ " 'ד יא',\n",
+ " 'ד יב',\n",
+ " 'ד יג',\n",
+ " 'ד יד',\n",
+ " 'ד יח',\n",
+ " 'ד כא',\n",
+ " 'ד כב',\n",
+ " 'ד כג',\n",
+ " 'ה א',\n",
+ " 'ה א',\n",
+ " 'ו א',\n",
+ " 'ו ב',\n",
+ " 'ו ג',\n",
+ " 'ו ד',\n",
+ " 'ו א',\n",
+ " 'ו ב',\n",
+ " 'ו ג',\n",
+ " 'ו ד',\n",
+ " 'ז א',\n",
+ " 'ז ב',\n",
+ " 'ז ג',\n",
+ " 'ז ד',\n",
+ " 'ז א',\n",
+ " 'ז ב',\n",
+ " 'ז ג',\n",
+ " 'ח א',\n",
+ " 'ח ב',\n",
+ " 'ח ג',\n",
+ " 'ח ד',\n",
+ " 'ח ה',\n",
+ " 'ח ו',\n",
+ " 'ח ז',\n",
+ " 'ח ח',\n",
+ " 'ח ט',\n",
+ " 'ח טו',\n",
+ " 'ח טז',\n",
+ " 'ח י',\n",
+ " 'ח יא',\n",
+ " 'ח יב',\n",
+ " 'ח יג',\n",
+ " 'ח יד',\n",
+ " 'ח יז',\n",
+ " 'ח א',\n",
+ " 'ח ב',\n",
+ " 'ח ג',\n",
+ " 'ח ד',\n",
+ " 'ח ה',\n",
+ " 'ח ו',\n",
+ " 'ח ז',\n",
+ " 'ח ט',\n",
+ " 'ח טו',\n",
+ " 'ח טז',\n",
+ " 'ח י',\n",
+ " 'ח יא',\n",
+ " 'ח יב',\n",
+ " 'ח יג',\n",
+ " 'ח יד',\n",
+ " 'ח יז',\n",
+ " 'ט א',\n",
+ " 'ט ב',\n",
+ " 'ט ג',\n",
+ " 'ט ד',\n",
+ " 'ט ה',\n",
+ " 'ט ו',\n",
+ " 'ט א',\n",
+ " 'ט ב',\n",
+ " 'ט ג',\n",
+ " 'ט ד',\n",
+ " 'ט ה',\n",
+ " 'ט ו',\n",
+ " 'טו א',\n",
+ " 'טו ב',\n",
+ " 'טו ג',\n",
+ " 'טו ד',\n",
+ " 'טו ה',\n",
+ " 'טו ו',\n",
+ " 'טו א',\n",
+ " 'טו ב',\n",
+ " 'טו ג',\n",
+ " 'טו ד',\n",
+ " 'טו ה',\n",
+ " 'טו ו',\n",
+ " 'טז א',\n",
+ " 'טז א',\n",
+ " 'י א',\n",
+ " 'י ב',\n",
+ " 'י ג',\n",
+ " 'י ד',\n",
+ " 'י ה',\n",
+ " 'י ו',\n",
+ " 'י ז',\n",
+ " 'י ח',\n",
+ " 'י ט',\n",
+ " 'י י',\n",
+ " 'י יא',\n",
+ " 'י יב',\n",
+ " 'י א',\n",
+ " 'י ב',\n",
+ " 'י ג',\n",
+ " 'י ד',\n",
+ " 'י ה',\n",
+ " 'י ו',\n",
+ " 'י ז',\n",
+ " 'י ח',\n",
+ " 'י יא',\n",
+ " 'י יב',\n",
+ " 'יא א',\n",
+ " 'יא ב',\n",
+ " 'יא ג',\n",
+ " 'יא ד',\n",
+ " 'יא ה',\n",
+ " 'יא ו',\n",
+ " 'יא ז',\n",
+ " 'יא ח',\n",
+ " 'יא ט',\n",
+ " 'יא טו',\n",
+ " 'יא י',\n",
+ " 'יא יא',\n",
+ " 'יא יב',\n",
+ " 'יא יג',\n",
+ " 'יא יד',\n",
+ " 'יא א',\n",
+ " 'יא ב',\n",
+ " 'יא ג',\n",
+ " 'יא ד',\n",
+ " 'יא ה',\n",
+ " 'יא ו',\n",
+ " 'יא ז',\n",
+ " 'יא ח',\n",
+ " 'יא ט',\n",
+ " 'יא טו',\n",
+ " 'יא י',\n",
+ " 'יא יא',\n",
+ " 'יא יב',\n",
+ " 'יא יג',\n",
+ " 'יא יד',\n",
+ " 'יב א',\n",
+ " 'יב ב',\n",
+ " 'יב ג',\n",
+ " 'יב א',\n",
+ " 'יב ב',\n",
+ " 'יב ג',\n",
+ " 'יג א',\n",
+ " 'יג ב',\n",
+ " 'יג ג',\n",
+ " 'יג א',\n",
+ " 'יג ב',\n",
+ " 'יג ג',\n",
+ " 'יד א',\n",
+ " 'יד ב',\n",
+ " 'יד ג',\n",
+ " 'יד ד',\n",
+ " 'יד ה',\n",
+ " 'יד א',\n",
+ " 'יד ב',\n",
+ " 'יד ג',\n",
+ " 'יד ד',\n",
+ " 'יז א',\n",
+ " 'יז ב',\n",
+ " 'יז ג',\n",
+ " 'יז א',\n",
+ " 'יז ב',\n",
+ " 'יז ג',\n",
+ " 'יח א',\n",
+ " 'יח ב',\n",
+ " 'יח ג',\n",
+ " 'יח א',\n",
+ " 'יח ב',\n",
+ " 'יח ג',\n",
+ " 'יט א',\n",
+ " 'יט ב',\n",
+ " 'יט א',\n",
+ " 'יט ב',\n",
+ " 'כ א',\n",
+ " 'כ ב',\n",
+ " 'כ א',\n",
+ " 'כ ב',\n",
+ " 'כא א',\n",
+ " 'כא ב',\n",
+ " 'כא ג',\n",
+ " 'כא ד',\n",
+ " 'כא א',\n",
+ " 'כא ג',\n",
+ " 'כא ד',\n",
+ " 'כב א',\n",
+ " 'כב א',\n",
+ " 'כג א',\n",
+ " 'כג ב',\n",
+ " 'כג ג',\n",
+ " 'כג ד',\n",
+ " 'כג א',\n",
+ " 'כג ב',\n",
+ " 'כג ג',\n",
+ " 'כג ד',\n",
+ " 'כד א',\n",
+ " 'כד ב',\n",
+ " 'כד ג',\n",
+ " 'כד ד',\n",
+ " 'כד ה',\n",
+ " 'כד ו',\n",
+ " 'כד א',\n",
+ " 'כד ב',\n",
+ " 'כד ד',\n",
+ " 'כד ה',\n",
+ " 'כד ו',\n",
+ " 'כה א',\n",
+ " 'כה ב',\n",
+ " 'כה ג',\n",
+ " 'כה ד',\n",
+ " 'כה ה',\n",
+ " 'כה ו',\n",
+ " 'כה ז',\n",
+ " 'כה ח',\n",
+ " 'כה ט',\n",
+ " 'כה י',\n",
+ " 'כה יא',\n",
+ " 'כה יב',\n",
+ " 'כה יג',\n",
+ " 'כה א',\n",
+ " 'כה ב',\n",
+ " 'כה ג',\n",
+ " 'כה ה',\n",
+ " 'כה ו',\n",
+ " 'כה ז',\n",
+ " 'כה ח',\n",
+ " 'כה ט',\n",
+ " 'כה י',\n",
+ " 'כה יא',\n",
+ " 'כה יב',\n",
+ " 'כה יג',\n",
+ " 'כו א',\n",
+ " 'כו ב',\n",
+ " 'כו א',\n",
+ " 'כז א',\n",
+ " 'כז ב',\n",
+ " 'כז ג',\n",
+ " 'כז ד',\n",
+ " 'כז ה',\n",
+ " 'כז ו',\n",
+ " 'כז ז',\n",
+ " 'כז ח',\n",
+ " 'כז ט',\n",
+ " 'כז י',\n",
+ " 'כז יא',\n",
+ " 'כז א',\n",
+ " 'כז ב',\n",
+ " 'כז ד',\n",
+ " 'כז ה',\n",
+ " 'כז ו',\n",
+ " 'כז ז',\n",
+ " 'כז ח',\n",
+ " 'כז ט',\n",
+ " 'כז י',\n",
+ " 'כז יא',\n",
+ " 'כח א',\n",
+ " 'כח ב',\n",
+ " 'כח ג',\n",
+ " 'כח א',\n",
+ " 'כח ב',\n",
+ " 'כט א',\n",
+ " 'כט א',\n",
+ " 'ל א',\n",
+ " 'ל ב',\n",
+ " 'ל ג',\n",
+ " 'ל ד',\n",
+ " 'ל ה',\n",
+ " 'ל א',\n",
+ " 'ל ב',\n",
+ " 'ל ג',\n",
+ " 'ל ד',\n",
+ " 'ל ה',\n",
+ " 'לא א',\n",
+ " 'לא ב',\n",
+ " 'לא א',\n",
+ " 'לא ב',\n",
+ " 'לב א',\n",
+ " 'לב ב',\n",
+ " 'לב ג',\n",
+ " 'לב ד',\n",
+ " 'לב ה',\n",
+ " 'לב ו',\n",
+ " 'לב ז',\n",
+ " 'לב ח',\n",
+ " 'לב ט',\n",
+ " 'לב טו',\n",
+ " 'לב טז',\n",
+ " 'לב י',\n",
+ " 'לב יא',\n",
+ " 'לב יב',\n",
+ " 'לב יג',\n",
+ " 'לב יד',\n",
+ " 'לב יז',\n",
+ " 'לב יח',\n",
+ " 'לב יט',\n",
+ " 'לב כ',\n",
+ " 'לב כא',\n",
+ " 'לב כב',\n",
+ " 'לב כג',\n",
+ " 'לב כד',\n",
+ " 'לב כה',\n",
+ " 'לב כו',\n",
+ " 'לב כז',\n",
+ " 'לב כח',\n",
+ " 'לב כט',\n",
+ " 'לב ל',\n",
+ " 'לב לא',\n",
+ " 'לב לב',\n",
+ " 'לב לג',\n",
+ " 'לב לד',\n",
+ " 'לב לה',\n",
+ " 'לב לו',\n",
+ " 'לב לז',\n",
+ " 'לב לח',\n",
+ " 'לב לט',\n",
+ " 'לב מ',\n",
+ " 'לב מא',\n",
+ " 'לב מב',\n",
+ " 'לב מג',\n",
+ " 'לב מד',\n",
+ " 'לב מה',\n",
+ " 'לב מו',\n",
+ " 'לב מז',\n",
+ " 'לב מח',\n",
+ " 'לב מט',\n",
+ " 'לב נ',\n",
+ " 'לב נא',\n",
+ " 'לב נב',\n",
+ " 'לב א',\n",
+ " 'לב ב',\n",
+ " 'לב ג',\n",
+ " 'לב ד',\n",
+ " 'לב ה',\n",
+ " 'לב ו',\n",
+ " 'לב ז',\n",
+ " 'לב ח',\n",
+ " 'לב ט',\n",
+ " 'לב טו',\n",
+ " 'לב טז',\n",
+ " 'לב י',\n",
+ " 'לב יא',\n",
+ " 'לב יב',\n",
+ " 'לב יג',\n",
+ " 'לב יד',\n",
+ " 'לב יז',\n",
+ " 'לב יח',\n",
+ " 'לב יט',\n",
+ " 'לב כ',\n",
+ " 'לב כג',\n",
+ " 'לב כד',\n",
+ " 'לב כה',\n",
+ " 'לב כז',\n",
+ " 'לב כח',\n",
+ " 'לב כט',\n",
+ " 'לב לא',\n",
+ " 'לב לב',\n",
+ " 'לב לג',\n",
+ " 'לב לה',\n",
+ " 'לב לו',\n",
+ " 'לב לז',\n",
+ " 'לב לח',\n",
+ " 'לב לט',\n",
+ " 'לב מ',\n",
+ " 'לב מא',\n",
+ " 'לב מב',\n",
+ " 'לב מד',\n",
+ " 'לב מה',\n",
+ " 'לב מז',\n",
+ " 'לב מט',\n",
+ " 'לב נ',\n",
+ " 'לב נא',\n",
+ " 'לב נב',\n",
+ " 'לג א',\n",
+ " 'לג ב',\n",
+ " 'לג ג',\n",
+ " 'לג ד',\n",
+ " 'לג ה',\n",
+ " 'לג א',\n",
+ " 'לג ג',\n",
+ " 'לג ד',\n",
+ " 'לג ה',\n",
+ " 'לד א',\n",
+ " 'לד ב',\n",
+ " 'לד ג',\n",
+ " 'לד ד',\n",
+ " 'לד א',\n",
+ " 'לד ב',\n",
+ " 'לד ג',\n",
+ " 'לד ד',\n",
+ " 'לה א',\n",
+ " 'לה ד',\n",
+ " 'לה א',\n",
+ " 'לו א',\n",
+ " 'לו ב',\n",
+ " 'לו ג',\n",
+ " 'לו א',\n",
+ " 'לו ב',\n",
+ " 'לו ג',\n",
+ " 'לז א',\n",
+ " 'לז ב',\n",
+ " 'לז ג',\n",
+ " 'לז א',\n",
+ " 'לז ב',\n",
+ " 'לז ג',\n",
+ " 'לח א',\n",
+ " 'לח ב',\n",
+ " 'לח ג',\n",
+ " 'לח ד',\n",
+ " 'לח ה',\n",
+ " 'לח ו',\n",
+ " 'לח ז',\n",
+ " 'לח ח',\n",
+ " 'לח ט',\n",
+ " 'לח י',\n",
+ " 'לח יא',\n",
+ " 'לח יב',\n",
+ " 'לח יג',\n",
+ " 'לח א',\n",
+ " 'לח ב',\n",
+ " 'לח ג',\n",
+ " 'לח ד',\n",
+ " 'לח ה',\n",
+ " 'לח ז',\n",
+ " 'לח ח',\n",
+ " 'לח ט',\n",
+ " 'לח י',\n",
+ " 'לח יא',\n",
+ " 'לח יב',\n",
+ " 'לט א',\n",
+ " 'לט ב',\n",
+ " 'לט ג',\n",
+ " 'לט ד',\n",
+ " 'לט ה',\n",
+ " 'לט ו',\n",
+ " 'לט ז',\n",
+ " 'לט ח',\n",
+ " 'לט ט',\n",
+ " 'לט י',\n",
+ " 'לט א',\n",
+ " 'לט ב',\n",
+ " 'לט ג',\n",
+ " 'לט ד',\n",
+ " 'לט ו',\n",
+ " 'לט ז',\n",
+ " 'לט ח',\n",
+ " 'לט ט',\n",
+ " 'לט י',\n",
+ " 'מ א',\n",
+ " 'מ ב',\n",
+ " 'מ ג',\n",
+ " 'מ ד',\n",
+ " 'מ ה',\n",
+ " 'מ ו',\n",
+ " 'מ ז',\n",
+ " 'מ ח',\n",
+ " 'מ א',\n",
+ " 'מ ב',\n",
+ " 'מ ג',\n",
+ " 'מ ד',\n",
+ " 'מ ו',\n",
+ " 'מ ז',\n",
+ " 'מא א',\n",
+ " 'מא א',\n",
+ " 'מב א',\n",
+ " 'מב ב',\n",
+ " 'מב ג',\n",
+ " 'מב א',\n",
+ " 'מב ג',\n",
+ " 'מג א',\n",
+ " 'מג ב',\n",
+ " 'מג ג',\n",
+ " 'מג ד',\n",
+ " 'מג ה',\n",
+ " 'מג ו',\n",
+ " 'מג ז',\n",
+ " 'מג ח',\n",
+ " 'מג ט',\n",
+ " 'מג א',\n",
+ " 'מג ב',\n",
+ " 'מג ג',\n",
+ " 'מג ד',\n",
+ " 'מג ה',\n",
+ " 'מג ו',\n",
+ " 'מג ז',\n",
+ " 'מג ח',\n",
+ " 'מד א',\n",
+ " 'מד א',\n",
+ " 'מה א',\n",
+ " 'מה ב',\n",
+ " 'מה א',\n",
+ " 'מה ב',\n",
+ " 'מו א',\n",
+ " 'מו ב',\n",
+ " 'מו ג',\n",
+ " 'מו ד',\n",
+ " 'מו ה',\n",
+ " 'מו ו',\n",
+ " 'מו ז',\n",
+ " 'מו ח',\n",
+ " 'מו ט',\n",
+ " 'מו א',\n",
+ " 'מו ב',\n",
+ " 'מו ג',\n",
+ " 'מו ד',\n",
+ " 'מו ה',\n",
+ " 'מו ו',\n",
+ " 'מו ז',\n",
+ " 'מו ח',\n",
+ " 'מו ט',\n",
+ " 'מז א',\n",
+ " 'מז ב',\n",
+ " 'מז ג',\n",
+ " 'מז ד',\n",
+ " 'מז ה',\n",
+ " 'מז ו',\n",
+ " 'מז ז',\n",
+ " 'מז ח',\n",
+ " 'מז ט',\n",
+ " 'מז י',\n",
+ " 'מז יא',\n",
+ " 'מז יב',\n",
+ " 'מז יג',\n",
+ " 'מז יד',\n",
+ " 'מז א',\n",
+ " 'מז ג',\n",
+ " 'מז ד',\n",
+ " 'מז ה',\n",
+ " 'מז ו',\n",
+ " 'מז ז',\n",
+ " 'מז ח',\n",
+ " 'מז ט',\n",
+ " 'מז י',\n",
+ " 'מז יא',\n",
+ " 'מז יב',\n",
+ " 'מז יג',\n",
+ " 'מז יד',\n",
+ " 'מח *',\n",
+ " 'מח א',\n",
+ " 'מח יד',\n",
+ " 'מח א',\n",
+ " 'מט א',\n",
+ " 'מט א',\n",
+ " 'נ א',\n",
+ " 'נ א',\n",
+ " 'נא א',\n",
+ " 'נא ב',\n",
+ " 'נא ג',\n",
+ " 'נא ד',\n",
+ " 'נא ה',\n",
+ " 'נא ו',\n",
+ " 'נא ז',\n",
+ " 'נא ח',\n",
+ " 'נא ט',\n",
+ " 'נא א',\n",
+ " 'נא ב',\n",
+ " 'נא ג',\n",
+ " 'נא ד',\n",
+ " 'נא ה',\n",
+ " 'נא ו',\n",
+ " 'נא ז',\n",
+ " 'נא ט',\n",
+ " 'נב א',\n",
+ " 'נב א',\n",
+ " 'נג א',\n",
+ " 'נג ב',\n",
+ " 'נג ג',\n",
+ " 'נג ד',\n",
+ " 'נג ה',\n",
+ " 'נג ו',\n",
+ " 'נג ז',\n",
+ " 'נג ח',\n",
+ " 'נג ט',\n",
+ " 'נג טו',\n",
+ " 'נג טז',\n",
+ " 'נג י',\n",
+ " 'נג יא',\n",
+ " 'נג יב',\n",
+ " 'נג יג',\n",
+ " 'נג יד',\n",
+ " 'נג יז',\n",
+ " 'נג יח',\n",
+ " 'נג יט',\n",
+ " 'נג כ',\n",
+ " 'נג כא',\n",
+ " 'נג כב',\n",
+ " 'נג כג',\n",
+ " 'נג כד',\n",
+ " 'נג כה',\n",
+ " 'נג כו',\n",
+ " 'נג א',\n",
+ " 'נג ב',\n",
+ " 'נג ג',\n",
+ " 'נג ד',\n",
+ " 'נג ה',\n",
+ " 'נג ו',\n",
+ " 'נג ט',\n",
+ " 'נג טז',\n",
+ " 'נג י',\n",
+ " 'נג יא',\n",
+ " 'נג יב',\n",
+ " 'נג יד',\n",
+ " 'נג יח',\n",
+ " 'נג יט',\n",
+ " 'נג כ',\n",
+ " 'נג כא',\n",
+ " 'נג כב',\n",
+ " 'נג כג',\n",
+ " 'נג כה',\n",
+ " 'נג כו',\n",
+ " 'נד א',\n",
+ " 'נד ב',\n",
+ " 'נד ג',\n",
+ " 'נד א',\n",
+ " 'נד ב',\n",
+ " 'נד ג',\n",
+ " 'נה א',\n",
+ " 'נה ב',\n",
+ " 'נה ג',\n",
+ " 'נה ד',\n",
+ " 'נה ה',\n",
+ " 'נה ו',\n",
+ " 'נה ז',\n",
+ " 'נה ח',\n",
+ " 'נה ט',\n",
+ " 'נה טו',\n",
+ " 'נה טז',\n",
+ " 'נה י',\n",
+ " 'נה יא',\n",
+ " 'נה יב',\n",
+ " 'נה יג',\n",
+ " 'נה יד',\n",
+ " 'נה יז',\n",
+ " 'נה יח',\n",
+ " 'נה יט',\n",
+ " 'נה כ',\n",
+ " 'נה כא',\n",
+ " 'נה כב',\n",
+ " 'נה א',\n",
+ " 'נה ב',\n",
+ " 'נה ג',\n",
+ " 'נה ד',\n",
+ " 'נה ה',\n",
+ " 'נה ו',\n",
+ " 'נה ז',\n",
+ " 'נה טז',\n",
+ " 'נה י',\n",
+ " 'נה יג',\n",
+ " 'נה יד',\n",
+ " 'נה יט',\n",
+ " 'נה כ',\n",
+ " 'נה כא',\n",
+ " 'נה כב',\n",
+ " 'נו א',\n",
+ " 'נו ב',\n",
+ " 'נו ג',\n",
+ " 'נו ד',\n",
+ " 'נו ה',\n",
+ " 'נו א',\n",
+ " 'נו ב',\n",
+ " 'נו ג',\n",
+ " 'נז א',\n",
+ " 'נז ב',\n",
+ " 'נז א',\n",
+ " 'נח א',\n",
+ " 'נח ב',\n",
+ " 'נח ג',\n",
+ " 'נח ד',\n",
+ " 'נח ה',\n",
+ " 'נח ו',\n",
+ " 'נח ז',\n",
+ " 'נח א',\n",
+ " 'נח ב',\n",
+ " 'נח ג',\n",
+ " 'נח ד',\n",
+ " 'נח ה',\n",
+ " 'נח ו',\n",
+ " 'נח ז',\n",
+ " 'נט א',\n",
+ " 'נט ב',\n",
+ " 'נט ג',\n",
+ " 'נט ד',\n",
+ " 'נט ה',\n",
+ " 'נט א',\n",
+ " 'נט ב',\n",
+ " 'נט ג',\n",
+ " 'נט ד',\n",
+ " 'נט ה',\n",
+ " 'ס א',\n",
+ " 'ס ב',\n",
+ " 'ס ג',\n",
+ " 'ס ד',\n",
+ " 'ס ה',\n",
+ " 'ס א',\n",
+ " 'ס ב',\n",
+ " 'ס ג',\n",
+ " 'ס ד',\n",
+ " 'ס ה',\n",
+ " 'סא א',\n",
+ " 'סא ב',\n",
+ " 'סא ג',\n",
+ " 'סא ד',\n",
+ " 'סא ה',\n",
+ " 'סא ו',\n",
+ " 'סא ז',\n",
+ " 'סא ח',\n",
+ " 'סא ט',\n",
+ " 'סא טו',\n",
+ " 'סא טז',\n",
+ " 'סא י',\n",
+ " 'סא יא',\n",
+ " 'סא יב',\n",
+ " 'סא יג',\n",
+ " 'סא יד',\n",
+ " 'סא יז',\n",
+ " 'סא יח',\n",
+ " 'סא יט',\n",
+ " 'סא כ',\n",
+ " 'סא כא',\n",
+ " 'סא כב',\n",
+ " 'סא כג',\n",
+ " 'סא כד',\n",
+ " 'סא כה',\n",
+ " 'סא כו',\n",
+ " 'סא א',\n",
+ " 'סא ג',\n",
+ " 'סא ה',\n",
+ " 'סא ו',\n",
+ " 'סא ז',\n",
+ " 'סא ט',\n",
+ " 'סא י',\n",
+ " 'סא יא',\n",
+ " 'סא יב',\n",
+ " 'סא יג',\n",
+ " 'סא יד',\n",
+ " 'סא כ',\n",
+ " 'סא כא',\n",
+ " 'סא כג',\n",
+ " 'סא כד',\n",
+ " 'סא כה',\n",
+ " 'סא כו',\n",
+ " 'סב א',\n",
+ " 'סב ב',\n",
+ " 'סב ג',\n",
+ " 'סב ד',\n",
+ " 'סב ה',\n",
+ " 'סב א',\n",
+ " 'סב ב',\n",
+ " 'סב ד',\n",
+ " 'סב ה',\n",
+ " 'סג א',\n",
+ " 'סג ב',\n",
+ " 'סג ג',\n",
+ " 'סג ד',\n",
+ " 'סג ה',\n",
+ " 'סג ו',\n",
+ " 'סג ז',\n",
+ " 'סג ח',\n",
+ " 'סג ט',\n",
+ " 'סג א',\n",
+ " 'סג ב',\n",
+ " 'סג ג',\n",
+ " 'סג ד',\n",
+ " 'סג ה',\n",
+ " 'סג ו',\n",
+ " 'סד א',\n",
+ " 'סד ב',\n",
+ " 'סד ג',\n",
+ " 'סד ד',\n",
+ " 'סד ב',\n",
+ " 'סד ד',\n",
+ " 'סה א',\n",
+ " 'סה ב',\n",
+ " 'סה ג',\n",
+ " 'סה א',\n",
+ " 'סה ב',\n",
+ " 'סו א',\n",
+ " 'סו ב',\n",
+ " 'סו ג',\n",
+ " 'סו ד',\n",
+ " 'סו ה',\n",
+ " 'סו ו',\n",
+ " 'סו ז',\n",
+ " 'סו ח',\n",
+ " 'סו ט',\n",
+ " 'סו י',\n",
+ " 'סו א',\n",
+ " 'סו ב',\n",
+ " 'סו ג',\n",
+ " 'סו ד',\n",
+ " 'סו ה',\n",
+ " 'סו ז',\n",
+ " 'סו ח',\n",
+ " 'סו י',\n",
+ " 'סז א',\n",
+ " 'סז א',\n",
+ " 'סח א',\n",
+ " 'סח א',\n",
+ " 'סט א',\n",
+ " 'סט ב',\n",
+ " 'סט א',\n",
+ " 'סט ב',\n",
+ " 'ע א',\n",
+ " 'ע ב',\n",
+ " 'ע ג',\n",
+ " 'ע ד',\n",
+ " 'ע ה',\n",
+ " 'ע א',\n",
+ " 'ע ג',\n",
+ " 'ע ד',\n",
+ " 'ע ה',\n",
+ " 'עא א',\n",
+ " 'עא ב',\n",
+ " 'עא ג',\n",
+ " 'עא ד',\n",
+ " 'עא ה',\n",
+ " 'עא ו',\n",
+ " 'עא ז',\n",
+ " 'עא א',\n",
+ " 'עא ב',\n",
+ " 'עא ז',\n",
+ " 'עב א',\n",
+ " 'עב ב',\n",
+ " 'עב ג',\n",
+ " 'עב ד',\n",
+ " 'עב ה',\n",
+ " 'עב א',\n",
+ " 'עב ב',\n",
+ " 'עב ג',\n",
+ " 'עב ד',\n",
+ " 'עב ה',\n",
+ " 'עג א',\n",
+ " 'עג ב',\n",
+ " 'עג ג',\n",
+ " 'עג ��',\n",
+ " 'עג א',\n",
+ " 'עג ב',\n",
+ " 'עג ג',\n",
+ " 'עג ד',\n",
+ " 'עד א',\n",
+ " 'עד ב',\n",
+ " 'עד ג',\n",
+ " 'עד ד',\n",
+ " 'עד ה',\n",
+ " 'עד ו',\n",
+ " 'עד א',\n",
+ " 'עד ב',\n",
+ " 'עד ג',\n",
+ " 'עד ד',\n",
+ " 'עד ה',\n",
+ " 'עד ו',\n",
+ " 'עה א',\n",
+ " 'עה ב',\n",
+ " 'עה ג',\n",
+ " 'עה ד',\n",
+ " 'עה ה',\n",
+ " 'עה ו',\n",
+ " 'עה א',\n",
+ " 'עה ב',\n",
+ " 'עה ג',\n",
+ " 'עה ד',\n",
+ " 'עה ו',\n",
+ " 'עו א',\n",
+ " 'עו ב',\n",
+ " 'עו ג',\n",
+ " 'עו ד',\n",
+ " 'עו ה',\n",
+ " 'עו ו',\n",
+ " 'עו ז',\n",
+ " 'עו ח',\n",
+ " 'עו ב',\n",
+ " 'עו ג',\n",
+ " 'עו ד',\n",
+ " 'עו ה',\n",
+ " 'עו ו',\n",
+ " 'עו ז',\n",
+ " 'עו ח',\n",
+ " 'עז א',\n",
+ " 'עז ב',\n",
+ " 'עז ב',\n",
+ " 'עח א',\n",
+ " 'עח א',\n",
+ " 'עט א',\n",
+ " 'עט ב',\n",
+ " 'עט ג',\n",
+ " 'עט ד',\n",
+ " 'עט ה',\n",
+ " 'עט ו',\n",
+ " 'עט ז',\n",
+ " 'עט ח',\n",
+ " 'עט ט',\n",
+ " 'עט א',\n",
+ " 'עט ב',\n",
+ " 'עט ג',\n",
+ " 'עט ה',\n",
+ " 'עט ו',\n",
+ " 'עט ז',\n",
+ " 'עט ט',\n",
+ " 'פ א',\n",
+ " 'פ א',\n",
+ " 'פא א',\n",
+ " 'פא ב',\n",
+ " 'פא א',\n",
+ " 'פא ב',\n",
+ " 'פב א',\n",
+ " 'פב ב',\n",
+ " 'פב א',\n",
+ " 'פב ב',\n",
+ " 'פג א',\n",
+ " ...]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "simanim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.413890Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.413643Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.421535Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.420532Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.413866Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "id2label = {}\n",
+ "label2id = {}\n",
+ "i=0\n",
+ "for siman in simanim:\n",
+ " id2label[i]=siman\n",
+ " label2id[siman]=i\n",
+ " i+=1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.425057Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.424829Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.450132Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.449341Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.425033Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{0: 'א א',\n",
+ " 1: 'א ב',\n",
+ " 2: 'א ג',\n",
+ " 3: 'א ד',\n",
+ " 4: 'א ה',\n",
+ " 5: 'א ו',\n",
+ " 6: 'א ז',\n",
+ " 7: 'א ח',\n",
+ " 8: 'א ט',\n",
+ " 9: 'א א',\n",
+ " 10: 'א ב',\n",
+ " 11: 'א ג',\n",
+ " 12: 'א ד',\n",
+ " 13: 'א ה',\n",
+ " 14: 'א ו',\n",
+ " 15: 'א ז',\n",
+ " 16: 'א ח',\n",
+ " 17: 'א ט',\n",
+ " 18: 'ב א',\n",
+ " 19: 'ב ב',\n",
+ " 20: 'ב ג',\n",
+ " 21: 'ב ד',\n",
+ " 22: 'ב ה',\n",
+ " 23: 'ב ו',\n",
+ " 24: 'ב א',\n",
+ " 25: 'ב ב',\n",
+ " 26: 'ב ג',\n",
+ " 27: 'ב ד',\n",
+ " 28: 'ב ו',\n",
+ " 29: 'ג א',\n",
+ " 30: 'ג ב',\n",
+ " 31: 'ג ג',\n",
+ " 32: 'ג ד',\n",
+ " 33: 'ג ה',\n",
+ " 34: 'ג ו',\n",
+ " 35: 'ג ז',\n",
+ " 36: 'ג ח',\n",
+ " 37: 'ג ט',\n",
+ " 38: 'ג טו',\n",
+ " 39: 'ג טז',\n",
+ " 40: 'ג י',\n",
+ " 41: 'ג יא',\n",
+ " 42: 'ג יב',\n",
+ " 43: 'ג יג',\n",
+ " 44: 'ג יד',\n",
+ " 45: 'ג יז',\n",
+ " 46: 'ג א',\n",
+ " 47: 'ג ב',\n",
+ " 48: 'ג ג',\n",
+ " 49: 'ג ד',\n",
+ " 50: 'ג ה',\n",
+ " 51: 'ג ו',\n",
+ " 52: 'ג ז',\n",
+ " 53: 'ג ח',\n",
+ " 54: 'ג ט',\n",
+ " 55: 'ג טז',\n",
+ " 56: 'ג י',\n",
+ " 57: 'ג יא',\n",
+ " 58: 'ג יב',\n",
+ " 59: 'ג יג',\n",
+ " 60: 'ג יד',\n",
+ " 61: 'ג יז',\n",
+ " 62: 'ד א',\n",
+ " 63: 'ד ב',\n",
+ " 64: 'ד ג',\n",
+ " 65: 'ד ד',\n",
+ " 66: 'ד ה',\n",
+ " 67: 'ד ו',\n",
+ " 68: 'ד ז',\n",
+ " 69: 'ד ח',\n",
+ " 70: 'ד ט',\n",
+ " 71: 'ד טו',\n",
+ " 72: 'ד טז',\n",
+ " 73: 'ד י',\n",
+ " 74: 'ד יא',\n",
+ " 75: 'ד יב',\n",
+ " 76: 'ד יג',\n",
+ " 77: 'ד יד',\n",
+ " 78: 'ד יז',\n",
+ " 79: 'ד יח',\n",
+ " 80: 'ד יט',\n",
+ " 81: 'ד כ',\n",
+ " 82: 'ד כא',\n",
+ " 83: 'ד כב',\n",
+ " 84: 'ד כג',\n",
+ " 85: 'ד א',\n",
+ " 86: 'ד ב',\n",
+ " 87: 'ד ד',\n",
+ " 88: 'ד ז',\n",
+ " 89: 'ד ח',\n",
+ " 90: 'ד טו',\n",
+ " 91: 'ד טז',\n",
+ " 92: 'ד י',\n",
+ " 93: 'ד יא',\n",
+ " 94: 'ד יב',\n",
+ " 95: 'ד יג',\n",
+ " 96: 'ד יד',\n",
+ " 97: 'ד יח',\n",
+ " 98: 'ד כא',\n",
+ " 99: 'ד כב',\n",
+ " 100: 'ד כג',\n",
+ " 101: 'ה א',\n",
+ " 102: 'ה א',\n",
+ " 103: 'ו א',\n",
+ " 104: 'ו ב',\n",
+ " 105: 'ו ג',\n",
+ " 106: 'ו ד',\n",
+ " 107: 'ו א',\n",
+ " 108: 'ו ב',\n",
+ " 109: 'ו ג',\n",
+ " 110: 'ו ד',\n",
+ " 111: 'ז א',\n",
+ " 112: 'ז ב',\n",
+ " 113: 'ז ג',\n",
+ " 114: 'ז ד',\n",
+ " 115: 'ז א',\n",
+ " 116: 'ז ב',\n",
+ " 117: 'ז ג',\n",
+ " 118: 'ח א',\n",
+ " 119: 'ח ב',\n",
+ " 120: 'ח ג',\n",
+ " 121: 'ח ד',\n",
+ " 122: 'ח ה',\n",
+ " 123: 'ח ו',\n",
+ " 124: 'ח ז',\n",
+ " 125: 'ח ח',\n",
+ " 126: 'ח ט',\n",
+ " 127: 'ח טו',\n",
+ " 128: 'ח טז',\n",
+ " 129: 'ח י',\n",
+ " 130: 'ח יא',\n",
+ " 131: 'ח יב',\n",
+ " 132: 'ח יג',\n",
+ " 133: 'ח יד',\n",
+ " 134: 'ח יז',\n",
+ " 135: 'ח א',\n",
+ " 136: 'ח ב',\n",
+ " 137: 'ח ג',\n",
+ " 138: 'ח ד',\n",
+ " 139: 'ח ה',\n",
+ " 140: 'ח ו',\n",
+ " 141: 'ח ז',\n",
+ " 142: 'ח ט',\n",
+ " 143: 'ח טו',\n",
+ " 144: 'ח טז',\n",
+ " 145: 'ח י',\n",
+ " 146: 'ח יא',\n",
+ " 147: 'ח יב',\n",
+ " 148: 'ח יג',\n",
+ " 149: 'ח יד',\n",
+ " 150: 'ח יז',\n",
+ " 151: 'ט א',\n",
+ " 152: 'ט ב',\n",
+ " 153: 'ט ג',\n",
+ " 154: 'ט ד',\n",
+ " 155: 'ט ה',\n",
+ " 156: 'ט ו',\n",
+ " 157: 'ט א',\n",
+ " 158: 'ט ב',\n",
+ " 159: 'ט ג',\n",
+ " 160: 'ט ד',\n",
+ " 161: 'ט ה',\n",
+ " 162: 'ט ו',\n",
+ " 163: 'טו א',\n",
+ " 164: 'טו ב',\n",
+ " 165: 'טו ג',\n",
+ " 166: 'טו ד',\n",
+ " 167: 'טו ה',\n",
+ " 168: 'טו ו',\n",
+ " 169: 'טו א',\n",
+ " 170: 'טו ב',\n",
+ " 171: 'טו ג',\n",
+ " 172: 'טו ד',\n",
+ " 173: 'טו ה',\n",
+ " 174: 'טו ו',\n",
+ " 175: 'טז א',\n",
+ " 176: 'טז א',\n",
+ " 177: 'י א',\n",
+ " 178: 'י ב',\n",
+ " 179: 'י ג',\n",
+ " 180: 'י ד',\n",
+ " 181: 'י ה',\n",
+ " 182: 'י ו',\n",
+ " 183: 'י ז',\n",
+ " 184: 'י ח',\n",
+ " 185: 'י ט',\n",
+ " 186: 'י י',\n",
+ " 187: 'י יא',\n",
+ " 188: 'י יב',\n",
+ " 189: 'י א',\n",
+ " 190: 'י ב',\n",
+ " 191: 'י ג',\n",
+ " 192: 'י ד',\n",
+ " 193: 'י ה',\n",
+ " 194: 'י ו',\n",
+ " 195: 'י ז',\n",
+ " 196: 'י ח',\n",
+ " 197: 'י יא',\n",
+ " 198: 'י יב',\n",
+ " 199: 'יא א',\n",
+ " 200: 'יא ב',\n",
+ " 201: 'יא ג',\n",
+ " 202: 'יא ד',\n",
+ " 203: 'יא ה',\n",
+ " 204: 'יא ו',\n",
+ " 205: 'יא ז',\n",
+ " 206: 'יא ח',\n",
+ " 207: 'יא ט',\n",
+ " 208: 'יא טו',\n",
+ " 209: 'יא י',\n",
+ " 210: 'יא יא',\n",
+ " 211: 'יא יב',\n",
+ " 212: 'יא יג',\n",
+ " 213: 'יא יד',\n",
+ " 214: 'יא א',\n",
+ " 215: 'יא ב',\n",
+ " 216: 'יא ג',\n",
+ " 217: 'יא ד',\n",
+ " 218: 'יא ה',\n",
+ " 219: 'יא ו',\n",
+ " 220: 'יא ז',\n",
+ " 221: 'יא ח',\n",
+ " 222: 'יא ט',\n",
+ " 223: 'יא טו',\n",
+ " 224: 'יא י',\n",
+ " 225: 'יא יא',\n",
+ " 226: 'יא יב',\n",
+ " 227: 'יא יג',\n",
+ " 228: 'יא יד',\n",
+ " 229: 'יב א',\n",
+ " 230: 'יב ב',\n",
+ " 231: 'יב ג',\n",
+ " 232: 'יב א',\n",
+ " 233: 'יב ב',\n",
+ " 234: 'יב ג',\n",
+ " 235: 'יג א',\n",
+ " 236: 'יג ב',\n",
+ " 237: 'יג ג',\n",
+ " 238: 'יג א',\n",
+ " 239: 'יג ב',\n",
+ " 240: 'יג ג',\n",
+ " 241: 'יד א',\n",
+ " 242: 'יד ב',\n",
+ " 243: 'יד ג',\n",
+ " 244: 'יד ד',\n",
+ " 245: 'יד ה',\n",
+ " 246: 'יד א',\n",
+ " 247: 'יד ב',\n",
+ " 248: 'יד ג',\n",
+ " 249: 'יד ד',\n",
+ " 250: 'יז א',\n",
+ " 251: 'יז ב',\n",
+ " 252: 'יז ג',\n",
+ " 253: 'יז א',\n",
+ " 254: 'יז ב',\n",
+ " 255: 'יז ג',\n",
+ " 256: 'יח א',\n",
+ " 257: 'יח ב',\n",
+ " 258: 'יח ג',\n",
+ " 259: 'יח א',\n",
+ " 260: 'יח ב',\n",
+ " 261: 'יח ג',\n",
+ " 262: 'יט א',\n",
+ " 263: 'יט ב',\n",
+ " 264: 'יט א',\n",
+ " 265: 'יט ב',\n",
+ " 266: 'כ א',\n",
+ " 267: 'כ ב',\n",
+ " 268: 'כ א',\n",
+ " 269: 'כ ב',\n",
+ " 270: 'כא א',\n",
+ " 271: 'כא ב',\n",
+ " 272: 'כא ג',\n",
+ " 273: 'כא ד',\n",
+ " 274: 'כא א',\n",
+ " 275: 'כא ג',\n",
+ " 276: 'כא ד',\n",
+ " 277: 'כב א',\n",
+ " 278: 'כב א',\n",
+ " 279: 'כג א',\n",
+ " 280: 'כג ב',\n",
+ " 281: 'כג ג',\n",
+ " 282: 'כג ד',\n",
+ " 283: 'כג א',\n",
+ " 284: 'כג ב',\n",
+ " 285: 'כג ג',\n",
+ " 286: 'כג ד',\n",
+ " 287: 'כד א',\n",
+ " 288: 'כד ב',\n",
+ " 289: 'כד ג',\n",
+ " 290: 'כד ד',\n",
+ " 291: 'כד ה',\n",
+ " 292: 'כד ו',\n",
+ " 293: 'כד א',\n",
+ " 294: 'כד ב',\n",
+ " 295: 'כד ד',\n",
+ " 296: 'כד ה',\n",
+ " 297: 'כד ו',\n",
+ " 298: 'כה א',\n",
+ " 299: 'כה ב',\n",
+ " 300: 'כה ג',\n",
+ " 301: 'כה ד',\n",
+ " 302: 'כה ה',\n",
+ " 303: 'כה ו',\n",
+ " 304: 'כה ז',\n",
+ " 305: 'כה ח',\n",
+ " 306: 'כה ט',\n",
+ " 307: 'כה י',\n",
+ " 308: 'כה יא',\n",
+ " 309: 'כה יב',\n",
+ " 310: 'כה יג',\n",
+ " 311: 'כה א',\n",
+ " 312: 'כה ב',\n",
+ " 313: 'כה ג',\n",
+ " 314: 'כה ה',\n",
+ " 315: 'כה ו',\n",
+ " 316: 'כה ז',\n",
+ " 317: 'כה ח',\n",
+ " 318: 'כה ט',\n",
+ " 319: 'כה י',\n",
+ " 320: 'כה יא',\n",
+ " 321: 'כה יב',\n",
+ " 322: 'כה יג',\n",
+ " 323: 'כו א',\n",
+ " 324: 'כו ב',\n",
+ " 325: 'כו א',\n",
+ " 326: 'כז א',\n",
+ " 327: 'כז ב',\n",
+ " 328: 'כז ג',\n",
+ " 329: 'כז ד',\n",
+ " 330: 'כז ה',\n",
+ " 331: 'כז ו',\n",
+ " 332: 'כז ז',\n",
+ " 333: 'כז ח',\n",
+ " 334: 'כז ט',\n",
+ " 335: 'כז י',\n",
+ " 336: 'כז יא',\n",
+ " 337: 'כז א',\n",
+ " 338: 'כז ב',\n",
+ " 339: 'כז ד',\n",
+ " 340: 'כז ה',\n",
+ " 341: 'כז ו',\n",
+ " 342: 'כז ז',\n",
+ " 343: 'כז ח',\n",
+ " 344: 'כז ט',\n",
+ " 345: 'כז י',\n",
+ " 346: 'כז יא',\n",
+ " 347: 'כח א',\n",
+ " 348: 'כח ב',\n",
+ " 349: 'כח ג',\n",
+ " 350: 'כח א',\n",
+ " 351: 'כח ב',\n",
+ " 352: 'כט א',\n",
+ " 353: 'כט א',\n",
+ " 354: 'ל א',\n",
+ " 355: 'ל ב',\n",
+ " 356: 'ל ג',\n",
+ " 357: 'ל ד',\n",
+ " 358: 'ל ה',\n",
+ " 359: 'ל א',\n",
+ " 360: 'ל ב',\n",
+ " 361: 'ל ג',\n",
+ " 362: 'ל ד',\n",
+ " 363: 'ל ה',\n",
+ " 364: 'לא א',\n",
+ " 365: 'לא ב',\n",
+ " 366: 'לא א',\n",
+ " 367: 'לא ב',\n",
+ " 368: 'לב א',\n",
+ " 369: 'לב ב',\n",
+ " 370: 'לב ג',\n",
+ " 371: 'לב ד',\n",
+ " 372: 'לב ה',\n",
+ " 373: 'לב ו',\n",
+ " 374: 'לב ז',\n",
+ " 375: 'לב ח',\n",
+ " 376: 'לב ט',\n",
+ " 377: 'לב טו',\n",
+ " 378: 'לב טז',\n",
+ " 379: 'לב י',\n",
+ " 380: 'לב יא',\n",
+ " 381: 'לב יב',\n",
+ " 382: 'לב יג',\n",
+ " 383: 'לב יד',\n",
+ " 384: 'לב יז',\n",
+ " 385: 'לב יח',\n",
+ " 386: 'לב יט',\n",
+ " 387: 'לב כ',\n",
+ " 388: 'לב כא',\n",
+ " 389: 'לב כב',\n",
+ " 390: 'לב כג',\n",
+ " 391: 'לב כד',\n",
+ " 392: 'לב כה',\n",
+ " 393: 'לב כו',\n",
+ " 394: 'לב כז',\n",
+ " 395: 'לב כח',\n",
+ " 396: 'לב כט',\n",
+ " 397: 'לב ל',\n",
+ " 398: 'לב לא',\n",
+ " 399: 'לב לב',\n",
+ " 400: 'לב לג',\n",
+ " 401: 'לב לד',\n",
+ " 402: 'לב לה',\n",
+ " 403: 'לב לו',\n",
+ " 404: 'לב לז',\n",
+ " 405: 'לב לח',\n",
+ " 406: 'לב לט',\n",
+ " 407: 'לב מ',\n",
+ " 408: 'לב מא',\n",
+ " 409: 'לב מב',\n",
+ " 410: 'לב מג',\n",
+ " 411: 'לב מד',\n",
+ " 412: 'לב מה',\n",
+ " 413: 'לב מו',\n",
+ " 414: 'לב מז',\n",
+ " 415: 'לב מח',\n",
+ " 416: 'לב מט',\n",
+ " 417: 'לב נ',\n",
+ " 418: 'לב נא',\n",
+ " 419: 'לב נב',\n",
+ " 420: 'לב א',\n",
+ " 421: 'לב ב',\n",
+ " 422: 'לב ג',\n",
+ " 423: 'לב ד',\n",
+ " 424: 'לב ה',\n",
+ " 425: 'לב ו',\n",
+ " 426: 'לב ז',\n",
+ " 427: 'לב ח',\n",
+ " 428: 'לב ט',\n",
+ " 429: 'לב טו',\n",
+ " 430: 'לב טז',\n",
+ " 431: 'לב י',\n",
+ " 432: 'לב יא',\n",
+ " 433: 'לב יב',\n",
+ " 434: 'לב יג',\n",
+ " 435: 'לב יד',\n",
+ " 436: 'לב יז',\n",
+ " 437: 'לב יח',\n",
+ " 438: 'לב יט',\n",
+ " 439: 'לב כ',\n",
+ " 440: 'לב כג',\n",
+ " 441: 'לב כד',\n",
+ " 442: 'לב כה',\n",
+ " 443: 'לב כז',\n",
+ " 444: 'לב כח',\n",
+ " 445: 'לב כט',\n",
+ " 446: 'לב לא',\n",
+ " 447: 'לב לב',\n",
+ " 448: 'לב לג',\n",
+ " 449: 'לב לה',\n",
+ " 450: 'לב לו',\n",
+ " 451: 'לב לז',\n",
+ " 452: 'לב לח',\n",
+ " 453: 'לב לט',\n",
+ " 454: 'לב מ',\n",
+ " 455: 'לב מא',\n",
+ " 456: 'לב מב',\n",
+ " 457: 'לב מד',\n",
+ " 458: 'לב מה',\n",
+ " 459: 'לב מז',\n",
+ " 460: 'לב מט',\n",
+ " 461: 'לב נ',\n",
+ " 462: 'לב נא',\n",
+ " 463: 'לב נב',\n",
+ " 464: 'לג א',\n",
+ " 465: 'לג ב',\n",
+ " 466: 'לג ג',\n",
+ " 467: 'לג ד',\n",
+ " 468: 'לג ה',\n",
+ " 469: 'לג א',\n",
+ " 470: 'לג ג',\n",
+ " 471: 'לג ד',\n",
+ " 472: 'לג ה',\n",
+ " 473: 'לד א',\n",
+ " 474: 'לד ב',\n",
+ " 475: 'לד ג',\n",
+ " 476: 'לד ד',\n",
+ " 477: 'לד א',\n",
+ " 478: 'לד ב',\n",
+ " 479: 'לד ג',\n",
+ " 480: 'לד ד',\n",
+ " 481: 'לה א',\n",
+ " 482: 'לה ד',\n",
+ " 483: 'לה א',\n",
+ " 484: 'לו א',\n",
+ " 485: 'לו ב',\n",
+ " 486: 'לו ג',\n",
+ " 487: 'לו א',\n",
+ " 488: 'לו ב',\n",
+ " 489: 'לו ג',\n",
+ " 490: 'לז א',\n",
+ " 491: 'לז ב',\n",
+ " 492: 'לז ג',\n",
+ " 493: 'לז א',\n",
+ " 494: 'לז ב',\n",
+ " 495: 'לז ג',\n",
+ " 496: 'לח א',\n",
+ " 497: 'לח ב',\n",
+ " 498: 'לח ג',\n",
+ " 499: 'לח ד',\n",
+ " 500: 'לח ה',\n",
+ " 501: 'לח ו',\n",
+ " 502: 'לח ז',\n",
+ " 503: 'לח ח',\n",
+ " 504: 'לח ט',\n",
+ " 505: 'לח י',\n",
+ " 506: 'לח יא',\n",
+ " 507: 'לח יב',\n",
+ " 508: 'לח יג',\n",
+ " 509: 'לח א',\n",
+ " 510: 'לח ב',\n",
+ " 511: 'לח ג',\n",
+ " 512: 'לח ד',\n",
+ " 513: 'לח ה',\n",
+ " 514: 'לח ז',\n",
+ " 515: 'לח ח',\n",
+ " 516: 'לח ט',\n",
+ " 517: 'לח י',\n",
+ " 518: 'לח יא',\n",
+ " 519: 'לח יב',\n",
+ " 520: 'לט א',\n",
+ " 521: 'לט ב',\n",
+ " 522: 'לט ג',\n",
+ " 523: 'לט ד',\n",
+ " 524: 'לט ה',\n",
+ " 525: 'לט ו',\n",
+ " 526: 'לט ז',\n",
+ " 527: 'לט ח',\n",
+ " 528: 'לט ט',\n",
+ " 529: 'לט י',\n",
+ " 530: 'לט א',\n",
+ " 531: 'לט ב',\n",
+ " 532: 'לט ג',\n",
+ " 533: 'לט ד',\n",
+ " 534: 'לט ו',\n",
+ " 535: 'לט ז',\n",
+ " 536: 'לט ח',\n",
+ " 537: 'לט ט',\n",
+ " 538: 'לט י',\n",
+ " 539: 'מ א',\n",
+ " 540: 'מ ב',\n",
+ " 541: 'מ ג',\n",
+ " 542: 'מ ד',\n",
+ " 543: 'מ ה',\n",
+ " 544: 'מ ו',\n",
+ " 545: 'מ ז',\n",
+ " 546: 'מ ח',\n",
+ " 547: 'מ א',\n",
+ " 548: 'מ ב',\n",
+ " 549: 'מ ג',\n",
+ " 550: 'מ ד',\n",
+ " 551: 'מ ו',\n",
+ " 552: 'מ ז',\n",
+ " 553: 'מא א',\n",
+ " 554: 'מא א',\n",
+ " 555: 'מב א',\n",
+ " 556: 'מב ב',\n",
+ " 557: 'מב ג',\n",
+ " 558: 'מב א',\n",
+ " 559: 'מב ג',\n",
+ " 560: 'מג א',\n",
+ " 561: 'מג ב',\n",
+ " 562: 'מג ג',\n",
+ " 563: 'מג ד',\n",
+ " 564: 'מג ה',\n",
+ " 565: 'מג ו',\n",
+ " 566: 'מג ז',\n",
+ " 567: 'מג ח',\n",
+ " 568: 'מג ט',\n",
+ " 569: 'מג א',\n",
+ " 570: 'מג ב',\n",
+ " 571: 'מג ג',\n",
+ " 572: 'מג ד',\n",
+ " 573: 'מג ה',\n",
+ " 574: 'מ�� ו',\n",
+ " 575: 'מג ז',\n",
+ " 576: 'מג ח',\n",
+ " 577: 'מד א',\n",
+ " 578: 'מד א',\n",
+ " 579: 'מה א',\n",
+ " 580: 'מה ב',\n",
+ " 581: 'מה א',\n",
+ " 582: 'מה ב',\n",
+ " 583: 'מו א',\n",
+ " 584: 'מו ב',\n",
+ " 585: 'מו ג',\n",
+ " 586: 'מו ד',\n",
+ " 587: 'מו ה',\n",
+ " 588: 'מו ו',\n",
+ " 589: 'מו ז',\n",
+ " 590: 'מו ח',\n",
+ " 591: 'מו ט',\n",
+ " 592: 'מו א',\n",
+ " 593: 'מו ב',\n",
+ " 594: 'מו ג',\n",
+ " 595: 'מו ד',\n",
+ " 596: 'מו ה',\n",
+ " 597: 'מו ו',\n",
+ " 598: 'מו ז',\n",
+ " 599: 'מו ח',\n",
+ " 600: 'מו ט',\n",
+ " 601: 'מז א',\n",
+ " 602: 'מז ב',\n",
+ " 603: 'מז ג',\n",
+ " 604: 'מז ד',\n",
+ " 605: 'מז ה',\n",
+ " 606: 'מז ו',\n",
+ " 607: 'מז ז',\n",
+ " 608: 'מז ח',\n",
+ " 609: 'מז ט',\n",
+ " 610: 'מז י',\n",
+ " 611: 'מז יא',\n",
+ " 612: 'מז יב',\n",
+ " 613: 'מז יג',\n",
+ " 614: 'מז יד',\n",
+ " 615: 'מז א',\n",
+ " 616: 'מז ג',\n",
+ " 617: 'מז ד',\n",
+ " 618: 'מז ה',\n",
+ " 619: 'מז ו',\n",
+ " 620: 'מז ז',\n",
+ " 621: 'מז ח',\n",
+ " 622: 'מז ט',\n",
+ " 623: 'מז י',\n",
+ " 624: 'מז יא',\n",
+ " 625: 'מז יב',\n",
+ " 626: 'מז יג',\n",
+ " 627: 'מז יד',\n",
+ " 628: 'מח *',\n",
+ " 629: 'מח א',\n",
+ " 630: 'מח יד',\n",
+ " 631: 'מח א',\n",
+ " 632: 'מט א',\n",
+ " 633: 'מט א',\n",
+ " 634: 'נ א',\n",
+ " 635: 'נ א',\n",
+ " 636: 'נא א',\n",
+ " 637: 'נא ב',\n",
+ " 638: 'נא ג',\n",
+ " 639: 'נא ד',\n",
+ " 640: 'נא ה',\n",
+ " 641: 'נא ו',\n",
+ " 642: 'נא ז',\n",
+ " 643: 'נא ח',\n",
+ " 644: 'נא ט',\n",
+ " 645: 'נא א',\n",
+ " 646: 'נא ב',\n",
+ " 647: 'נא ג',\n",
+ " 648: 'נא ד',\n",
+ " 649: 'נא ה',\n",
+ " 650: 'נא ו',\n",
+ " 651: 'נא ז',\n",
+ " 652: 'נא ט',\n",
+ " 653: 'נב א',\n",
+ " 654: 'נב א',\n",
+ " 655: 'נג א',\n",
+ " 656: 'נג ב',\n",
+ " 657: 'נג ג',\n",
+ " 658: 'נג ד',\n",
+ " 659: 'נג ה',\n",
+ " 660: 'נג ו',\n",
+ " 661: 'נג ז',\n",
+ " 662: 'נג ח',\n",
+ " 663: 'נג ט',\n",
+ " 664: 'נג טו',\n",
+ " 665: 'נג טז',\n",
+ " 666: 'נג י',\n",
+ " 667: 'נג יא',\n",
+ " 668: 'נג יב',\n",
+ " 669: 'נג יג',\n",
+ " 670: 'נג יד',\n",
+ " 671: 'נג יז',\n",
+ " 672: 'נג יח',\n",
+ " 673: 'נג יט',\n",
+ " 674: 'נג כ',\n",
+ " 675: 'נג כא',\n",
+ " 676: 'נג כב',\n",
+ " 677: 'נג כג',\n",
+ " 678: 'נג כד',\n",
+ " 679: 'נג כה',\n",
+ " 680: 'נג כו',\n",
+ " 681: 'נג א',\n",
+ " 682: 'נג ב',\n",
+ " 683: 'נג ג',\n",
+ " 684: 'נג ד',\n",
+ " 685: 'נג ה',\n",
+ " 686: 'נג ו',\n",
+ " 687: 'נג ט',\n",
+ " 688: 'נג טז',\n",
+ " 689: 'נג י',\n",
+ " 690: 'נג יא',\n",
+ " 691: 'נג יב',\n",
+ " 692: 'נג יד',\n",
+ " 693: 'נג יח',\n",
+ " 694: 'נג יט',\n",
+ " 695: 'נג כ',\n",
+ " 696: 'נג כא',\n",
+ " 697: 'נג כב',\n",
+ " 698: 'נג כג',\n",
+ " 699: 'נג כה',\n",
+ " 700: 'נג כו',\n",
+ " 701: 'נד א',\n",
+ " 702: 'נד ב',\n",
+ " 703: 'נד ג',\n",
+ " 704: 'נד א',\n",
+ " 705: 'נד ב',\n",
+ " 706: 'נד ג',\n",
+ " 707: 'נה א',\n",
+ " 708: 'נה ב',\n",
+ " 709: 'נה ג',\n",
+ " 710: 'נה ד',\n",
+ " 711: 'נה ה',\n",
+ " 712: 'נה ו',\n",
+ " 713: 'נה ז',\n",
+ " 714: 'נה ח',\n",
+ " 715: 'נה ט',\n",
+ " 716: 'נה טו',\n",
+ " 717: 'נה טז',\n",
+ " 718: 'נה י',\n",
+ " 719: 'נה יא',\n",
+ " 720: 'נה יב',\n",
+ " 721: 'נה יג',\n",
+ " 722: 'נה יד',\n",
+ " 723: 'נה יז',\n",
+ " 724: 'נה יח',\n",
+ " 725: 'נה יט',\n",
+ " 726: 'נה כ',\n",
+ " 727: 'נה כא',\n",
+ " 728: 'נה כב',\n",
+ " 729: 'נה א',\n",
+ " 730: 'נה ב',\n",
+ " 731: 'נה ג',\n",
+ " 732: 'נה ד',\n",
+ " 733: 'נה ה',\n",
+ " 734: 'נה ו',\n",
+ " 735: 'נה ז',\n",
+ " 736: 'נה טז',\n",
+ " 737: 'נה י',\n",
+ " 738: 'נה יג',\n",
+ " 739: 'נה יד',\n",
+ " 740: 'נה יט',\n",
+ " 741: 'נה כ',\n",
+ " 742: 'נה כא',\n",
+ " 743: 'נה כב',\n",
+ " 744: 'נו א',\n",
+ " 745: 'נו ב',\n",
+ " 746: 'נו ג',\n",
+ " 747: 'נו ד',\n",
+ " 748: 'נו ה',\n",
+ " 749: 'נו א',\n",
+ " 750: 'נו ב',\n",
+ " 751: 'נו ג',\n",
+ " 752: 'נז א',\n",
+ " 753: 'נז ב',\n",
+ " 754: 'נז א',\n",
+ " 755: 'נח א',\n",
+ " 756: 'נח ב',\n",
+ " 757: 'נח ג',\n",
+ " 758: 'נח ד',\n",
+ " 759: 'נח ה',\n",
+ " 760: 'נח ו',\n",
+ " 761: 'נח ז',\n",
+ " 762: 'נח א',\n",
+ " 763: 'נח ב',\n",
+ " 764: 'נח ג',\n",
+ " 765: 'נח ד',\n",
+ " 766: 'נח ה',\n",
+ " 767: 'נח ו',\n",
+ " 768: 'נח ז',\n",
+ " 769: 'נט א',\n",
+ " 770: 'נט ב',\n",
+ " 771: 'נט ג',\n",
+ " 772: 'נט ד',\n",
+ " 773: 'נט ה',\n",
+ " 774: 'נט א',\n",
+ " 775: 'נט ב',\n",
+ " 776: 'נט ג',\n",
+ " 777: 'נט ד',\n",
+ " 778: 'נט ה',\n",
+ " 779: 'ס א',\n",
+ " 780: 'ס ב',\n",
+ " 781: 'ס ג',\n",
+ " 782: 'ס ד',\n",
+ " 783: 'ס ה',\n",
+ " 784: 'ס א',\n",
+ " 785: 'ס ב',\n",
+ " 786: 'ס ג',\n",
+ " 787: 'ס ד',\n",
+ " 788: 'ס ה',\n",
+ " 789: 'סא א',\n",
+ " 790: 'סא ב',\n",
+ " 791: 'סא ג',\n",
+ " 792: 'סא ד',\n",
+ " 793: 'סא ה',\n",
+ " 794: 'סא ו',\n",
+ " 795: 'סא ז',\n",
+ " 796: 'סא ח',\n",
+ " 797: 'סא ט',\n",
+ " 798: 'סא טו',\n",
+ " 799: 'סא טז',\n",
+ " 800: 'סא י',\n",
+ " 801: 'סא יא',\n",
+ " 802: 'סא יב',\n",
+ " 803: 'סא יג',\n",
+ " 804: 'סא יד',\n",
+ " 805: 'סא יז',\n",
+ " 806: 'סא יח',\n",
+ " 807: 'סא יט',\n",
+ " 808: 'סא כ',\n",
+ " 809: 'סא כא',\n",
+ " 810: 'סא כב',\n",
+ " 811: 'סא כג',\n",
+ " 812: 'סא כד',\n",
+ " 813: 'סא כה',\n",
+ " 814: 'סא כו',\n",
+ " 815: 'סא א',\n",
+ " 816: 'סא ג',\n",
+ " 817: 'סא ה',\n",
+ " 818: 'סא ו',\n",
+ " 819: 'סא ז',\n",
+ " 820: 'סא ט',\n",
+ " 821: 'סא י',\n",
+ " 822: 'סא יא',\n",
+ " 823: 'סא יב',\n",
+ " 824: 'סא יג',\n",
+ " 825: 'סא יד',\n",
+ " 826: 'סא כ',\n",
+ " 827: 'סא כא',\n",
+ " 828: 'סא כג',\n",
+ " 829: 'סא כד',\n",
+ " 830: 'סא כה',\n",
+ " 831: 'סא כו',\n",
+ " 832: 'סב א',\n",
+ " 833: 'סב ב',\n",
+ " 834: 'סב ג',\n",
+ " 835: 'סב ד',\n",
+ " 836: 'סב ה',\n",
+ " 837: 'סב א',\n",
+ " 838: 'סב ב',\n",
+ " 839: 'סב ד',\n",
+ " 840: 'סב ה',\n",
+ " 841: 'סג א',\n",
+ " 842: 'סג ב',\n",
+ " 843: 'סג ג',\n",
+ " 844: 'סג ד',\n",
+ " 845: 'סג ה',\n",
+ " 846: 'סג ו',\n",
+ " 847: 'סג ז',\n",
+ " 848: 'סג ח',\n",
+ " 849: 'סג ט',\n",
+ " 850: 'סג א',\n",
+ " 851: 'סג ב',\n",
+ " 852: 'סג ג',\n",
+ " 853: 'סג ד',\n",
+ " 854: 'סג ה',\n",
+ " 855: 'סג ו',\n",
+ " 856: 'סד א',\n",
+ " 857: 'סד ב',\n",
+ " 858: 'סד ג',\n",
+ " 859: 'סד ד',\n",
+ " 860: 'סד ב',\n",
+ " 861: 'סד ד',\n",
+ " 862: 'סה א',\n",
+ " 863: 'סה ב',\n",
+ " 864: 'סה ג',\n",
+ " 865: 'סה א',\n",
+ " 866: 'סה ב',\n",
+ " 867: 'סו א',\n",
+ " 868: 'סו ב',\n",
+ " 869: 'סו ג',\n",
+ " 870: 'סו ד',\n",
+ " 871: 'סו ה',\n",
+ " 872: 'סו ו',\n",
+ " 873: 'סו ז',\n",
+ " 874: 'סו ח',\n",
+ " 875: 'סו ט',\n",
+ " 876: 'סו י',\n",
+ " 877: 'סו א',\n",
+ " 878: 'סו ב',\n",
+ " 879: 'סו ג',\n",
+ " 880: 'סו ד',\n",
+ " 881: 'סו ה',\n",
+ " 882: 'סו ז',\n",
+ " 883: 'סו ח',\n",
+ " 884: 'סו י',\n",
+ " 885: 'סז א',\n",
+ " 886: 'סז א',\n",
+ " 887: 'סח א',\n",
+ " 888: 'סח א',\n",
+ " 889: 'סט א',\n",
+ " 890: 'סט ב',\n",
+ " 891: 'סט א',\n",
+ " 892: 'סט ב',\n",
+ " 893: 'ע א',\n",
+ " 894: 'ע ב',\n",
+ " 895: 'ע ג',\n",
+ " 896: 'ע ד',\n",
+ " 897: 'ע ה',\n",
+ " 898: 'ע א',\n",
+ " 899: 'ע ג',\n",
+ " 900: 'ע ד',\n",
+ " 901: 'ע ה',\n",
+ " 902: 'עא א',\n",
+ " 903: 'עא ב',\n",
+ " 904: 'עא ג',\n",
+ " 905: 'עא ד',\n",
+ " 906: 'עא ה',\n",
+ " 907: 'עא ו',\n",
+ " 908: 'עא ז',\n",
+ " 909: 'עא א',\n",
+ " 910: 'עא ב',\n",
+ " 911: 'עא ז',\n",
+ " 912: 'עב א',\n",
+ " 913: 'עב ב',\n",
+ " 914: 'עב ג',\n",
+ " 915: 'עב ד',\n",
+ " 916: 'עב ה',\n",
+ " 917: 'עב א',\n",
+ " 918: 'עב ב',\n",
+ " 919: 'עב ג',\n",
+ " 920: 'עב ד',\n",
+ " 921: 'עב ה',\n",
+ " 922: 'עג א',\n",
+ " 923: 'עג ב',\n",
+ " 924: 'עג ג',\n",
+ " 925: 'עג ד',\n",
+ " 926: 'עג א',\n",
+ " 927: 'עג ב',\n",
+ " 928: 'עג ג',\n",
+ " 929: 'עג ד',\n",
+ " 930: 'עד א',\n",
+ " 931: 'עד ב',\n",
+ " 932: 'עד ג',\n",
+ " 933: 'עד ד',\n",
+ " 934: 'עד ה',\n",
+ " 935: 'עד ו',\n",
+ " 936: 'עד א',\n",
+ " 937: 'עד ב',\n",
+ " 938: 'עד ג',\n",
+ " 939: 'עד ד',\n",
+ " 940: 'עד ה',\n",
+ " 941: 'עד ו',\n",
+ " 942: 'עה א',\n",
+ " 943: 'עה ב',\n",
+ " 944: 'עה ג',\n",
+ " 945: 'עה ד',\n",
+ " 946: 'עה ה',\n",
+ " 947: 'עה ו',\n",
+ " 948: 'עה א',\n",
+ " 949: 'עה ב',\n",
+ " 950: 'עה ג',\n",
+ " 951: 'עה ד',\n",
+ " 952: 'עה ו',\n",
+ " 953: 'עו א',\n",
+ " 954: 'עו ב',\n",
+ " 955: 'עו ג',\n",
+ " 956: 'עו ד',\n",
+ " 957: 'עו ה',\n",
+ " 958: 'עו ו',\n",
+ " 959: 'עו ז',\n",
+ " 960: 'עו ח',\n",
+ " 961: 'עו ב',\n",
+ " 962: 'עו ג',\n",
+ " 963: 'עו ד',\n",
+ " 964: 'עו ה',\n",
+ " 965: 'עו ו',\n",
+ " 966: 'עו ז',\n",
+ " 967: 'עו ח',\n",
+ " 968: 'עז א',\n",
+ " 969: 'עז ב',\n",
+ " 970: 'עז ב',\n",
+ " 971: 'עח א',\n",
+ " 972: 'עח א',\n",
+ " 973: 'עט א',\n",
+ " 974: 'עט ב',\n",
+ " 975: 'עט ג',\n",
+ " 976: 'עט ד',\n",
+ " 977: 'עט ה',\n",
+ " 978: 'עט ו',\n",
+ " 979: 'עט ז',\n",
+ " 980: 'עט ח',\n",
+ " 981: 'עט ט',\n",
+ " 982: 'עט א',\n",
+ " 983: 'עט ב',\n",
+ " 984: 'עט ג',\n",
+ " 985: 'עט ה',\n",
+ " 986: 'עט ו',\n",
+ " 987: 'עט ז',\n",
+ " 988: 'עט ט',\n",
+ " 989: 'פ א',\n",
+ " 990: 'פ א',\n",
+ " 991: 'פא א',\n",
+ " 992: 'פא ב',\n",
+ " 993: 'פא א',\n",
+ " 994: 'פא ב',\n",
+ " 995: 'פב א',\n",
+ " 996: 'פב ב',\n",
+ " 997: 'פב א',\n",
+ " 998: 'פב ב',\n",
+ " 999: 'פג א',\n",
+ " ...}"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "id2label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.453902Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.453663Z",
+ "iopub.status.idle": "2023-12-05T04:05:00.479898Z",
+ "shell.execute_reply": "2023-12-05T04:05:00.478975Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.453877Z"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'א א': 0,\n",
+ " 'א ב': 1,\n",
+ " 'א ג': 2,\n",
+ " 'א ד': 3,\n",
+ " 'א ה': 4,\n",
+ " 'א ו': 5,\n",
+ " 'א ז': 6,\n",
+ " 'א ח': 7,\n",
+ " 'א ט': 8,\n",
+ " 'א א': 9,\n",
+ " 'א ב': 10,\n",
+ " 'א ג': 11,\n",
+ " 'א ד': 12,\n",
+ " 'א ה': 13,\n",
+ " 'א ו': 14,\n",
+ " 'א ז': 15,\n",
+ " 'א ח': 16,\n",
+ " 'א ט': 17,\n",
+ " 'ב א': 18,\n",
+ " 'ב ב': 19,\n",
+ " 'ב ג': 20,\n",
+ " 'ב ד': 21,\n",
+ " 'ב ה': 22,\n",
+ " 'ב ו': 23,\n",
+ " 'ב א': 24,\n",
+ " 'ב ב': 25,\n",
+ " 'ב ג': 26,\n",
+ " 'ב ד': 27,\n",
+ " 'ב ו': 28,\n",
+ " 'ג א': 29,\n",
+ " 'ג ב': 30,\n",
+ " 'ג ג': 31,\n",
+ " 'ג ד': 32,\n",
+ " 'ג ה': 33,\n",
+ " 'ג ו': 34,\n",
+ " 'ג ז': 35,\n",
+ " 'ג ח': 36,\n",
+ " 'ג ט': 37,\n",
+ " 'ג טו': 38,\n",
+ " 'ג טז': 39,\n",
+ " 'ג י': 40,\n",
+ " 'ג יא': 41,\n",
+ " 'ג יב': 42,\n",
+ " 'ג יג': 43,\n",
+ " 'ג יד': 44,\n",
+ " 'ג יז': 45,\n",
+ " 'ג א': 46,\n",
+ " 'ג ב': 47,\n",
+ " 'ג ג': 48,\n",
+ " 'ג ד': 49,\n",
+ " 'ג ה': 50,\n",
+ " 'ג ו': 51,\n",
+ " 'ג ז': 52,\n",
+ " 'ג ח': 53,\n",
+ " 'ג ט': 54,\n",
+ " 'ג טז': 55,\n",
+ " 'ג י': 56,\n",
+ " 'ג יא': 57,\n",
+ " 'ג יב': 58,\n",
+ " 'ג יג': 59,\n",
+ " 'ג יד': 60,\n",
+ " 'ג יז': 61,\n",
+ " 'ד א': 62,\n",
+ " 'ד ב': 63,\n",
+ " 'ד ג': 64,\n",
+ " 'ד ד': 65,\n",
+ " 'ד ה': 66,\n",
+ " 'ד ו': 67,\n",
+ " 'ד ז': 68,\n",
+ " 'ד ח': 69,\n",
+ " 'ד ט': 70,\n",
+ " 'ד טו': 71,\n",
+ " 'ד טז': 72,\n",
+ " 'ד י': 73,\n",
+ " 'ד יא': 74,\n",
+ " 'ד יב': 75,\n",
+ " 'ד יג': 76,\n",
+ " 'ד יד': 77,\n",
+ " 'ד יז': 78,\n",
+ " 'ד יח': 79,\n",
+ " 'ד יט': 80,\n",
+ " 'ד כ': 81,\n",
+ " 'ד כא': 82,\n",
+ " 'ד כב': 83,\n",
+ " 'ד כג': 84,\n",
+ " 'ד א': 85,\n",
+ " 'ד ב': 86,\n",
+ " 'ד ד': 87,\n",
+ " 'ד ז': 88,\n",
+ " 'ד ח': 89,\n",
+ " 'ד טו': 90,\n",
+ " 'ד טז': 91,\n",
+ " 'ד י': 92,\n",
+ " 'ד יא': 93,\n",
+ " 'ד יב': 94,\n",
+ " 'ד יג': 95,\n",
+ " 'ד יד': 96,\n",
+ " 'ד יח': 97,\n",
+ " 'ד כא': 98,\n",
+ " 'ד כב': 99,\n",
+ " 'ד כג': 100,\n",
+ " 'ה א': 101,\n",
+ " 'ה א': 102,\n",
+ " 'ו א': 103,\n",
+ " 'ו ב': 104,\n",
+ " 'ו ג': 105,\n",
+ " 'ו ד': 106,\n",
+ " 'ו א': 107,\n",
+ " 'ו ב': 108,\n",
+ " 'ו ג': 109,\n",
+ " 'ו ד': 110,\n",
+ " 'ז א': 111,\n",
+ " 'ז ב': 112,\n",
+ " 'ז ג': 113,\n",
+ " 'ז ד': 114,\n",
+ " 'ז א': 115,\n",
+ " 'ז ב': 116,\n",
+ " 'ז ג': 117,\n",
+ " 'ח א': 118,\n",
+ " 'ח ב': 119,\n",
+ " 'ח ג': 120,\n",
+ " 'ח ד': 121,\n",
+ " 'ח ה': 122,\n",
+ " 'ח ו': 123,\n",
+ " 'ח ז': 124,\n",
+ " 'ח ח': 125,\n",
+ " 'ח ט': 126,\n",
+ " 'ח טו': 127,\n",
+ " 'ח טז': 128,\n",
+ " 'ח י': 129,\n",
+ " 'ח יא': 130,\n",
+ " 'ח יב': 131,\n",
+ " 'ח יג': 132,\n",
+ " 'ח יד': 133,\n",
+ " 'ח יז': 134,\n",
+ " 'ח א': 135,\n",
+ " 'ח ב': 136,\n",
+ " 'ח ג': 137,\n",
+ " 'ח ד': 138,\n",
+ " 'ח ה': 139,\n",
+ " 'ח ו': 140,\n",
+ " 'ח ז': 141,\n",
+ " 'ח ט': 142,\n",
+ " 'ח טו': 143,\n",
+ " 'ח טז': 144,\n",
+ " 'ח י': 145,\n",
+ " 'ח יא': 146,\n",
+ " 'ח יב': 147,\n",
+ " 'ח יג': 148,\n",
+ " 'ח יד': 149,\n",
+ " 'ח יז': 150,\n",
+ " 'ט א': 151,\n",
+ " 'ט ב': 152,\n",
+ " 'ט ג': 153,\n",
+ " 'ט ד': 154,\n",
+ " 'ט ה': 155,\n",
+ " 'ט ו': 156,\n",
+ " 'ט א': 157,\n",
+ " 'ט ב': 158,\n",
+ " 'ט ג': 159,\n",
+ " 'ט ד': 160,\n",
+ " 'ט ה': 161,\n",
+ " 'ט ו': 162,\n",
+ " 'טו א': 163,\n",
+ " 'טו ב': 164,\n",
+ " 'טו ג': 165,\n",
+ " 'טו ד': 166,\n",
+ " 'טו ה': 167,\n",
+ " 'טו ו': 168,\n",
+ " 'טו א': 169,\n",
+ " 'טו ב': 170,\n",
+ " 'טו ג': 171,\n",
+ " 'טו ד': 172,\n",
+ " 'טו ה': 173,\n",
+ " 'טו ו': 174,\n",
+ " 'טז א': 175,\n",
+ " 'טז א': 176,\n",
+ " 'י א': 177,\n",
+ " 'י ב': 178,\n",
+ " 'י ג': 179,\n",
+ " 'י ד': 180,\n",
+ " 'י ה': 181,\n",
+ " 'י ו': 182,\n",
+ " 'י ז': 183,\n",
+ " 'י ח': 184,\n",
+ " 'י ט': 185,\n",
+ " 'י י': 186,\n",
+ " 'י יא': 187,\n",
+ " 'י יב': 188,\n",
+ " 'י א': 189,\n",
+ " 'י ב': 190,\n",
+ " 'י ג': 191,\n",
+ " 'י ד': 192,\n",
+ " 'י ה': 193,\n",
+ " 'י ו': 194,\n",
+ " 'י ז': 195,\n",
+ " 'י ח': 196,\n",
+ " 'י יא': 197,\n",
+ " 'י יב': 198,\n",
+ " 'יא א': 199,\n",
+ " 'יא ב': 200,\n",
+ " 'יא ג': 201,\n",
+ " 'יא ד': 202,\n",
+ " 'יא ה': 203,\n",
+ " 'יא ו': 204,\n",
+ " 'יא ז': 205,\n",
+ " 'יא ח': 206,\n",
+ " 'יא ט': 207,\n",
+ " 'יא טו': 208,\n",
+ " 'יא י': 209,\n",
+ " 'יא יא': 210,\n",
+ " 'יא יב': 211,\n",
+ " 'יא יג': 212,\n",
+ " 'יא יד': 213,\n",
+ " 'יא א': 214,\n",
+ " 'יא ב': 215,\n",
+ " 'יא ג': 216,\n",
+ " 'יא ד': 217,\n",
+ " 'יא ה': 218,\n",
+ " 'יא ו': 219,\n",
+ " 'יא ז': 220,\n",
+ " 'יא ח': 221,\n",
+ " 'יא ט': 222,\n",
+ " 'יא טו': 223,\n",
+ " 'יא י': 224,\n",
+ " 'יא יא': 225,\n",
+ " 'יא יב': 226,\n",
+ " 'יא יג': 227,\n",
+ " 'יא יד': 228,\n",
+ " 'יב א': 229,\n",
+ " 'יב ב': 230,\n",
+ " 'יב ג': 231,\n",
+ " 'יב א': 232,\n",
+ " 'יב ב': 233,\n",
+ " 'יב ג': 234,\n",
+ " 'יג א': 235,\n",
+ " 'יג ב': 236,\n",
+ " 'יג ג': 237,\n",
+ " 'יג א': 238,\n",
+ " 'יג ב': 239,\n",
+ " 'יג ג': 240,\n",
+ " 'יד א': 241,\n",
+ " 'יד ב': 242,\n",
+ " 'יד ג': 243,\n",
+ " 'יד ד': 244,\n",
+ " 'יד ה': 245,\n",
+ " 'יד א': 246,\n",
+ " 'יד ב': 247,\n",
+ " 'יד ג': 248,\n",
+ " 'יד ד': 249,\n",
+ " 'יז א': 250,\n",
+ " 'יז ב': 251,\n",
+ " 'יז ג': 252,\n",
+ " 'יז א': 253,\n",
+ " 'יז ב': 254,\n",
+ " 'יז ג': 255,\n",
+ " 'יח א': 256,\n",
+ " 'יח ב': 257,\n",
+ " 'יח ג': 258,\n",
+ " 'יח א': 259,\n",
+ " 'יח ב': 260,\n",
+ " 'יח ג': 261,\n",
+ " 'יט א': 262,\n",
+ " 'יט ב': 263,\n",
+ " 'יט א': 264,\n",
+ " 'יט ב': 265,\n",
+ " 'כ א': 266,\n",
+ " 'כ ב': 267,\n",
+ " 'כ א': 268,\n",
+ " 'כ ב': 269,\n",
+ " 'כא א': 270,\n",
+ " 'כא ב': 271,\n",
+ " 'כא ג': 272,\n",
+ " 'כא ד': 273,\n",
+ " 'כא א': 274,\n",
+ " 'כא ג': 275,\n",
+ " 'כא ד': 276,\n",
+ " 'כב א': 277,\n",
+ " 'כב א': 278,\n",
+ " 'כג א': 279,\n",
+ " 'כג ב': 280,\n",
+ " 'כג ג': 281,\n",
+ " 'כג ד': 282,\n",
+ " 'כג א': 283,\n",
+ " 'כג ב': 284,\n",
+ " 'כג ג': 285,\n",
+ " 'כג ד': 286,\n",
+ " 'כד א': 287,\n",
+ " 'כד ב': 288,\n",
+ " 'כד ג': 289,\n",
+ " 'כד ד': 290,\n",
+ " 'כד ה': 291,\n",
+ " 'כד ו': 292,\n",
+ " 'כד א': 293,\n",
+ " 'כד ב': 294,\n",
+ " 'כד ד': 295,\n",
+ " 'כד ה': 296,\n",
+ " 'כד ו': 297,\n",
+ " 'כה א': 298,\n",
+ " 'כה ב': 299,\n",
+ " 'כה ג': 300,\n",
+ " 'כה ד': 301,\n",
+ " 'כה ה': 302,\n",
+ " 'כה ו': 303,\n",
+ " 'כה ז': 304,\n",
+ " 'כה ח': 305,\n",
+ " 'כה ט': 306,\n",
+ " 'כה י': 307,\n",
+ " 'כה יא': 308,\n",
+ " 'כה יב': 309,\n",
+ " 'כה יג': 310,\n",
+ " 'כה א': 311,\n",
+ " 'כה ב': 312,\n",
+ " 'כה ג': 313,\n",
+ " 'כה ה': 314,\n",
+ " 'כה ו': 315,\n",
+ " 'כה ז': 316,\n",
+ " 'כה ח': 317,\n",
+ " 'כה ט': 318,\n",
+ " 'כה י': 319,\n",
+ " 'כה יא': 320,\n",
+ " 'כה יב': 321,\n",
+ " 'כה יג': 322,\n",
+ " 'כו א': 323,\n",
+ " 'כו ב': 324,\n",
+ " 'כו א': 325,\n",
+ " 'כז א': 326,\n",
+ " 'כז ב': 327,\n",
+ " 'כז ג': 328,\n",
+ " 'כז ד': 329,\n",
+ " 'כז ה': 330,\n",
+ " 'כז ו': 331,\n",
+ " 'כז ז': 332,\n",
+ " 'כז ח': 333,\n",
+ " 'כז ט': 334,\n",
+ " 'כז י': 335,\n",
+ " 'כז יא': 336,\n",
+ " 'כז א': 337,\n",
+ " 'כז ב': 338,\n",
+ " 'כז ד': 339,\n",
+ " 'כז ה': 340,\n",
+ " 'כז ו': 341,\n",
+ " 'כז ז': 342,\n",
+ " 'כז ח': 343,\n",
+ " 'כז ט': 344,\n",
+ " 'כז י': 345,\n",
+ " 'כז יא': 346,\n",
+ " 'כח א': 347,\n",
+ " 'כח ב': 348,\n",
+ " 'כח ג': 349,\n",
+ " 'כח א': 350,\n",
+ " 'כח ב': 351,\n",
+ " 'כט א': 352,\n",
+ " 'כט א': 353,\n",
+ " 'ל א': 354,\n",
+ " 'ל ב': 355,\n",
+ " 'ל ג': 356,\n",
+ " 'ל ד': 357,\n",
+ " 'ל ה': 358,\n",
+ " 'ל א': 359,\n",
+ " 'ל ב': 360,\n",
+ " 'ל ג': 361,\n",
+ " 'ל ד': 362,\n",
+ " 'ל ה': 363,\n",
+ " 'לא א': 364,\n",
+ " 'לא ב': 365,\n",
+ " 'לא א': 366,\n",
+ " 'לא ב': 367,\n",
+ " 'לב א': 368,\n",
+ " 'לב ב': 369,\n",
+ " 'לב ג': 370,\n",
+ " 'לב ד': 371,\n",
+ " 'לב ה': 372,\n",
+ " 'לב ו': 373,\n",
+ " 'לב ז': 374,\n",
+ " 'לב ח': 375,\n",
+ " 'לב ט': 376,\n",
+ " 'לב טו': 377,\n",
+ " 'לב טז': 378,\n",
+ " 'לב י': 379,\n",
+ " 'לב יא': 380,\n",
+ " 'לב יב': 381,\n",
+ " 'לב יג': 382,\n",
+ " 'לב יד': 383,\n",
+ " 'לב יז': 384,\n",
+ " 'לב יח': 385,\n",
+ " 'לב יט': 386,\n",
+ " 'לב כ': 387,\n",
+ " 'לב כא': 388,\n",
+ " 'לב כב': 389,\n",
+ " 'לב כג': 390,\n",
+ " 'לב כד': 391,\n",
+ " 'לב כה': 392,\n",
+ " 'לב כו': 393,\n",
+ " 'לב כז': 394,\n",
+ " 'לב כח': 395,\n",
+ " 'לב כט': 396,\n",
+ " 'לב ל': 397,\n",
+ " 'לב לא': 398,\n",
+ " 'לב לב': 399,\n",
+ " 'לב לג': 400,\n",
+ " 'לב לד': 401,\n",
+ " 'לב לה': 402,\n",
+ " 'לב לו': 403,\n",
+ " 'לב לז': 404,\n",
+ " 'לב לח': 405,\n",
+ " 'לב לט': 406,\n",
+ " 'לב מ': 407,\n",
+ " 'לב מא': 408,\n",
+ " 'לב מב': 409,\n",
+ " 'לב מג': 410,\n",
+ " 'לב מד': 411,\n",
+ " 'לב מה': 412,\n",
+ " 'לב מו': 413,\n",
+ " 'לב מז': 414,\n",
+ " 'לב מח': 415,\n",
+ " 'לב מט': 416,\n",
+ " 'לב נ': 417,\n",
+ " 'לב נא': 418,\n",
+ " 'לב נב': 419,\n",
+ " 'לב א': 420,\n",
+ " 'לב ב': 421,\n",
+ " 'לב ג': 422,\n",
+ " 'לב ד': 423,\n",
+ " 'לב ה': 424,\n",
+ " 'לב ו': 425,\n",
+ " 'לב ז': 426,\n",
+ " 'לב ח': 427,\n",
+ " 'לב ט': 428,\n",
+ " 'לב טו': 429,\n",
+ " 'לב טז': 430,\n",
+ " 'לב י': 431,\n",
+ " 'לב יא': 432,\n",
+ " 'לב יב': 433,\n",
+ " 'לב יג': 434,\n",
+ " 'לב יד': 435,\n",
+ " 'לב יז': 436,\n",
+ " 'לב יח': 437,\n",
+ " 'לב יט': 438,\n",
+ " 'לב כ': 439,\n",
+ " 'לב כג': 440,\n",
+ " 'לב כד': 441,\n",
+ " 'לב כה': 442,\n",
+ " 'לב כז': 443,\n",
+ " 'לב כח': 444,\n",
+ " 'לב כט': 445,\n",
+ " 'לב לא': 446,\n",
+ " 'לב לב': 447,\n",
+ " 'לב לג': 448,\n",
+ " 'לב לה': 449,\n",
+ " 'לב לו': 450,\n",
+ " 'לב לז': 451,\n",
+ " 'לב לח': 452,\n",
+ " 'לב לט': 453,\n",
+ " 'לב מ': 454,\n",
+ " 'לב מא': 455,\n",
+ " 'לב מב': 456,\n",
+ " 'לב מד': 457,\n",
+ " 'לב מה': 458,\n",
+ " 'לב מז': 459,\n",
+ " 'לב מט': 460,\n",
+ " 'לב נ': 461,\n",
+ " 'לב נא': 462,\n",
+ " 'לב נב': 463,\n",
+ " 'לג א': 464,\n",
+ " 'לג ב': 465,\n",
+ " 'לג ג': 466,\n",
+ " 'לג ד': 467,\n",
+ " 'לג ה': 468,\n",
+ " 'לג א': 469,\n",
+ " 'לג ג': 470,\n",
+ " 'לג ד': 471,\n",
+ " 'לג ה': 472,\n",
+ " 'לד א': 473,\n",
+ " 'לד ב': 474,\n",
+ " 'לד ג': 475,\n",
+ " 'לד ד': 476,\n",
+ " 'לד א': 477,\n",
+ " 'לד ב': 478,\n",
+ " 'לד ג': 479,\n",
+ " 'לד ד': 480,\n",
+ " 'לה א': 481,\n",
+ " 'לה ד': 482,\n",
+ " 'לה א': 483,\n",
+ " 'לו א': 484,\n",
+ " 'לו ב': 485,\n",
+ " 'לו ג': 486,\n",
+ " 'לו א': 487,\n",
+ " 'לו ב': 488,\n",
+ " 'לו ג': 489,\n",
+ " 'לז א': 490,\n",
+ " 'לז ב': 491,\n",
+ " 'לז ג': 492,\n",
+ " 'לז א': 493,\n",
+ " 'לז ב': 494,\n",
+ " 'לז ג': 495,\n",
+ " 'לח א': 496,\n",
+ " 'לח ב': 497,\n",
+ " 'לח ג': 498,\n",
+ " 'לח ד': 499,\n",
+ " 'לח ה': 500,\n",
+ " 'לח ו': 501,\n",
+ " 'לח ז': 502,\n",
+ " 'לח ח': 503,\n",
+ " 'לח ט': 504,\n",
+ " 'לח י': 505,\n",
+ " 'לח יא': 506,\n",
+ " 'לח יב': 507,\n",
+ " 'לח יג': 508,\n",
+ " 'לח א': 509,\n",
+ " 'לח ב': 510,\n",
+ " 'לח ג': 511,\n",
+ " 'לח ד': 512,\n",
+ " 'לח ה': 513,\n",
+ " 'לח ז': 514,\n",
+ " 'לח ח': 515,\n",
+ " 'לח ט': 516,\n",
+ " 'לח י': 517,\n",
+ " 'לח יא': 518,\n",
+ " 'לח יב': 519,\n",
+ " 'לט א': 520,\n",
+ " 'לט ב': 521,\n",
+ " 'לט ג': 522,\n",
+ " 'לט ד': 523,\n",
+ " 'לט ה': 524,\n",
+ " 'לט ו': 525,\n",
+ " 'לט ז': 526,\n",
+ " 'לט ח': 527,\n",
+ " 'לט ט': 528,\n",
+ " 'לט י': 529,\n",
+ " 'לט א': 530,\n",
+ " 'לט ב': 531,\n",
+ " 'לט ג': 532,\n",
+ " 'לט ד': 533,\n",
+ " 'לט ו': 534,\n",
+ " 'לט ז': 535,\n",
+ " 'לט ח': 536,\n",
+ " 'לט ט': 537,\n",
+ " 'לט י': 538,\n",
+ " 'מ א': 539,\n",
+ " 'מ ב': 540,\n",
+ " 'מ ג': 541,\n",
+ " 'מ ד': 542,\n",
+ " 'מ ה': 543,\n",
+ " 'מ ו': 544,\n",
+ " 'מ ז': 545,\n",
+ " 'מ ח': 546,\n",
+ " 'מ א': 547,\n",
+ " 'מ ב': 548,\n",
+ " 'מ ג': 549,\n",
+ " 'מ ד': 550,\n",
+ " 'מ ו': 551,\n",
+ " 'מ ז': 552,\n",
+ " 'מא א': 553,\n",
+ " 'מא א': 554,\n",
+ " 'מב א': 555,\n",
+ " 'מב ב': 556,\n",
+ " 'מב ג': 557,\n",
+ " 'מב א': 558,\n",
+ " 'מב ג': 559,\n",
+ " 'מג א': 560,\n",
+ " 'מג ב': 561,\n",
+ " 'מג ג': 562,\n",
+ " 'מג ד': 563,\n",
+ " 'מג ה': 564,\n",
+ " 'מג ו': 565,\n",
+ " 'מג ז': 566,\n",
+ " 'מג ח': 567,\n",
+ " 'מג ט': 568,\n",
+ " 'מג א': 569,\n",
+ " 'מג ב': 570,\n",
+ " 'מג ג': 571,\n",
+ " 'מג ד': 572,\n",
+ " 'מג ה': 573,\n",
+ " 'מג ו': 574,\n",
+ " 'מג ז': 575,\n",
+ " 'מג ח': 576,\n",
+ " 'מד א': 577,\n",
+ " 'מד א': 578,\n",
+ " 'מה א': 579,\n",
+ " 'מה ב': 580,\n",
+ " 'מה א': 581,\n",
+ " 'מה ב': 582,\n",
+ " 'מו א': 583,\n",
+ " 'מו ב': 584,\n",
+ " 'מו ג': 585,\n",
+ " 'מו ד': 586,\n",
+ " 'מו ה': 587,\n",
+ " 'מו ו': 588,\n",
+ " 'מו ז': 589,\n",
+ " 'מו ח': 590,\n",
+ " 'מו ט': 591,\n",
+ " 'מו א': 592,\n",
+ " 'מו ב': 593,\n",
+ " 'מו ג': 594,\n",
+ " 'מו ד': 595,\n",
+ " 'מו ה': 596,\n",
+ " 'מו ו': 597,\n",
+ " 'מו ז': 598,\n",
+ " 'מו ח': 599,\n",
+ " 'מו ט': 600,\n",
+ " 'מז א': 601,\n",
+ " 'מז ב': 602,\n",
+ " 'מז ג': 603,\n",
+ " 'מז ד': 604,\n",
+ " 'מז ה': 605,\n",
+ " 'מז ו': 606,\n",
+ " 'מז ז': 607,\n",
+ " 'מז ח': 608,\n",
+ " 'מז ט': 609,\n",
+ " 'מז י': 610,\n",
+ " 'מז יא': 611,\n",
+ " 'מז יב': 612,\n",
+ " 'מז יג': 613,\n",
+ " 'מז יד': 614,\n",
+ " 'מז א': 615,\n",
+ " 'מז ג': 616,\n",
+ " 'מז ד': 617,\n",
+ " 'מז ה': 618,\n",
+ " 'מז ו': 619,\n",
+ " 'מז ז': 620,\n",
+ " 'מז ח': 621,\n",
+ " 'מז ט': 622,\n",
+ " 'מז י': 623,\n",
+ " 'מז יא': 624,\n",
+ " 'מז יב': 625,\n",
+ " 'מז יג': 626,\n",
+ " 'מז יד': 627,\n",
+ " 'מח *': 628,\n",
+ " 'מח א': 629,\n",
+ " 'מח יד': 630,\n",
+ " 'מח א': 631,\n",
+ " 'מט א': 632,\n",
+ " 'מט א': 633,\n",
+ " 'נ א': 634,\n",
+ " 'נ א': 635,\n",
+ " 'נא א': 636,\n",
+ " 'נא ב': 637,\n",
+ " 'נא ג': 638,\n",
+ " 'נא ד': 639,\n",
+ " 'נא ה': 640,\n",
+ " 'נא ו': 641,\n",
+ " 'נא ז': 642,\n",
+ " 'נא ח': 643,\n",
+ " 'נא ט': 644,\n",
+ " 'נא א': 645,\n",
+ " 'נא ב': 646,\n",
+ " 'נא ג': 647,\n",
+ " 'נא ד': 648,\n",
+ " 'נא ה': 649,\n",
+ " 'נא ו': 650,\n",
+ " 'נא ז': 651,\n",
+ " 'נא ט': 652,\n",
+ " 'נב א': 653,\n",
+ " 'נב א': 654,\n",
+ " 'נג א': 655,\n",
+ " 'נג ב': 656,\n",
+ " 'נג ג': 657,\n",
+ " 'נג ד': 658,\n",
+ " 'נג ה': 659,\n",
+ " 'נג ו': 660,\n",
+ " 'נג ז': 661,\n",
+ " 'נג ח': 662,\n",
+ " 'נג ט': 663,\n",
+ " 'נג טו': 664,\n",
+ " 'נג טז': 665,\n",
+ " 'נג י': 666,\n",
+ " 'נג יא': 667,\n",
+ " 'נג יב': 668,\n",
+ " 'נג יג': 669,\n",
+ " 'נג יד': 670,\n",
+ " 'נג יז': 671,\n",
+ " 'נג יח': 672,\n",
+ " 'נג יט': 673,\n",
+ " 'נג כ': 674,\n",
+ " 'נג כא': 675,\n",
+ " 'נג כב': 676,\n",
+ " 'נג כג': 677,\n",
+ " 'נג כד': 678,\n",
+ " 'נג כה': 679,\n",
+ " 'נג כו': 680,\n",
+ " 'נג א': 681,\n",
+ " 'נג ב': 682,\n",
+ " 'נג ג': 683,\n",
+ " 'נג ד': 684,\n",
+ " 'נג ה': 685,\n",
+ " 'נג ו': 686,\n",
+ " 'נג ט': 687,\n",
+ " 'נג טז': 688,\n",
+ " 'נג י': 689,\n",
+ " 'נג יא': 690,\n",
+ " 'נג יב': 691,\n",
+ " 'נג יד': 692,\n",
+ " 'נג יח': 693,\n",
+ " 'נג יט': 694,\n",
+ " 'נג כ': 695,\n",
+ " 'נג כא': 696,\n",
+ " 'נג כב': 697,\n",
+ " 'נג כג': 698,\n",
+ " 'נג כה': 699,\n",
+ " 'נג כו': 700,\n",
+ " 'נד א': 701,\n",
+ " 'נד ב': 702,\n",
+ " 'נד ג': 703,\n",
+ " 'נד א': 704,\n",
+ " 'נד ב': 705,\n",
+ " 'נד ג': 706,\n",
+ " 'נה א': 707,\n",
+ " 'נה ב': 708,\n",
+ " 'נה ג': 709,\n",
+ " 'נה ד': 710,\n",
+ " 'נה ה': 711,\n",
+ " 'נה ו': 712,\n",
+ " 'נה ז': 713,\n",
+ " 'נה ח': 714,\n",
+ " 'נה ט': 715,\n",
+ " 'נה טו': 716,\n",
+ " 'נה טז': 717,\n",
+ " 'נה י': 718,\n",
+ " 'נה יא': 719,\n",
+ " 'נה יב': 720,\n",
+ " 'נה יג': 721,\n",
+ " 'נה יד': 722,\n",
+ " 'נה יז': 723,\n",
+ " 'נה יח': 724,\n",
+ " 'נה יט': 725,\n",
+ " 'נה כ': 726,\n",
+ " 'נה כא': 727,\n",
+ " 'נה כב': 728,\n",
+ " 'נה א': 729,\n",
+ " 'נה ב': 730,\n",
+ " 'נה ג': 731,\n",
+ " 'נה ד': 732,\n",
+ " 'נה ה': 733,\n",
+ " 'נה ו': 734,\n",
+ " 'נה ז': 735,\n",
+ " 'נה טז': 736,\n",
+ " 'נה י': 737,\n",
+ " 'נה יג': 738,\n",
+ " 'נה יד': 739,\n",
+ " 'נה יט': 740,\n",
+ " 'נה כ': 741,\n",
+ " 'נה כא': 742,\n",
+ " 'נה כב': 743,\n",
+ " 'נו א': 744,\n",
+ " 'נו ב': 745,\n",
+ " 'נו ג': 746,\n",
+ " 'נו ד': 747,\n",
+ " 'נו ה': 748,\n",
+ " 'נו א': 749,\n",
+ " 'נו ב': 750,\n",
+ " 'נו ג': 751,\n",
+ " 'נז א': 752,\n",
+ " 'נז ב': 753,\n",
+ " 'נז א': 754,\n",
+ " 'נח א': 755,\n",
+ " 'נח ב': 756,\n",
+ " 'נח ג': 757,\n",
+ " 'נח ד': 758,\n",
+ " 'נח ה': 759,\n",
+ " 'נח ו': 760,\n",
+ " 'נח ז': 761,\n",
+ " 'נח א': 762,\n",
+ " 'נח ב': 763,\n",
+ " 'נח ג': 764,\n",
+ " 'נח ד': 765,\n",
+ " 'נח ה': 766,\n",
+ " 'נח ו': 767,\n",
+ " 'נח ז': 768,\n",
+ " 'נט א': 769,\n",
+ " 'נט ב': 770,\n",
+ " 'נט ג': 771,\n",
+ " 'נט ד': 772,\n",
+ " 'נט ה': 773,\n",
+ " 'נט א': 774,\n",
+ " 'נט ב': 775,\n",
+ " 'נט ג': 776,\n",
+ " 'נט ד': 777,\n",
+ " 'נט ה': 778,\n",
+ " 'ס א': 779,\n",
+ " 'ס ב': 780,\n",
+ " 'ס ג': 781,\n",
+ " 'ס ד': 782,\n",
+ " 'ס ה': 783,\n",
+ " 'ס א': 784,\n",
+ " 'ס ב': 785,\n",
+ " 'ס ג': 786,\n",
+ " 'ס ד': 787,\n",
+ " 'ס ה': 788,\n",
+ " 'סא א': 789,\n",
+ " 'סא ב': 790,\n",
+ " 'סא ג': 791,\n",
+ " 'סא ד': 792,\n",
+ " 'סא ה': 793,\n",
+ " 'סא ו': 794,\n",
+ " 'סא ז': 795,\n",
+ " 'סא ח': 796,\n",
+ " 'סא ט': 797,\n",
+ " 'סא טו': 798,\n",
+ " 'סא טז': 799,\n",
+ " 'סא י': 800,\n",
+ " 'סא יא': 801,\n",
+ " 'סא יב': 802,\n",
+ " 'סא יג': 803,\n",
+ " 'סא יד': 804,\n",
+ " 'סא יז': 805,\n",
+ " 'סא יח': 806,\n",
+ " 'סא יט': 807,\n",
+ " 'סא כ': 808,\n",
+ " 'סא כא': 809,\n",
+ " 'סא כב': 810,\n",
+ " 'סא כג': 811,\n",
+ " 'סא כד': 812,\n",
+ " 'סא כה': 813,\n",
+ " 'סא כו': 814,\n",
+ " 'סא א': 815,\n",
+ " 'סא ג': 816,\n",
+ " 'סא ה': 817,\n",
+ " 'סא ו': 818,\n",
+ " 'סא ז': 819,\n",
+ " 'סא ט': 820,\n",
+ " 'סא י': 821,\n",
+ " 'סא יא': 822,\n",
+ " 'סא יב': 823,\n",
+ " 'סא יג': 824,\n",
+ " 'סא יד': 825,\n",
+ " 'סא כ': 826,\n",
+ " 'סא כא': 827,\n",
+ " 'סא כג': 828,\n",
+ " 'סא כד': 829,\n",
+ " 'סא כה': 830,\n",
+ " 'סא כו': 831,\n",
+ " 'סב א': 832,\n",
+ " 'סב ב': 833,\n",
+ " 'סב ג': 834,\n",
+ " 'סב ד': 835,\n",
+ " 'סב ה': 836,\n",
+ " 'סב א': 837,\n",
+ " 'סב ב': 838,\n",
+ " 'סב ד': 839,\n",
+ " 'סב ה': 840,\n",
+ " 'סג א': 841,\n",
+ " 'סג ב': 842,\n",
+ " 'סג ג': 843,\n",
+ " 'סג ד': 844,\n",
+ " 'סג ה': 845,\n",
+ " 'סג ו': 846,\n",
+ " 'סג ז': 847,\n",
+ " 'סג ח': 848,\n",
+ " 'סג ט': 849,\n",
+ " 'סג א': 850,\n",
+ " 'סג ב': 851,\n",
+ " 'סג ג': 852,\n",
+ " 'סג ד': 853,\n",
+ " 'סג ה': 854,\n",
+ " 'סג ו': 855,\n",
+ " 'סד א': 856,\n",
+ " 'סד ב': 857,\n",
+ " 'סד ג': 858,\n",
+ " 'סד ד': 859,\n",
+ " 'סד ב': 860,\n",
+ " 'סד ד': 861,\n",
+ " 'סה א': 862,\n",
+ " 'סה ב': 863,\n",
+ " 'סה ג': 864,\n",
+ " 'סה א': 865,\n",
+ " 'סה ב': 866,\n",
+ " 'סו א': 867,\n",
+ " 'סו ב': 868,\n",
+ " 'סו ג': 869,\n",
+ " 'סו ד': 870,\n",
+ " 'סו ה': 871,\n",
+ " 'סו ו': 872,\n",
+ " 'סו ז': 873,\n",
+ " 'סו ח': 874,\n",
+ " 'סו ט': 875,\n",
+ " 'סו י': 876,\n",
+ " 'סו א': 877,\n",
+ " 'סו ב': 878,\n",
+ " 'סו ג': 879,\n",
+ " 'סו ד': 880,\n",
+ " 'סו ה': 881,\n",
+ " 'סו ז': 882,\n",
+ " 'סו ח': 883,\n",
+ " 'סו י': 884,\n",
+ " 'סז א': 885,\n",
+ " 'סז א': 886,\n",
+ " 'סח א': 887,\n",
+ " 'סח א': 888,\n",
+ " 'סט א': 889,\n",
+ " 'סט ב': 890,\n",
+ " 'סט א': 891,\n",
+ " 'סט ב': 892,\n",
+ " 'ע א': 893,\n",
+ " 'ע ב': 894,\n",
+ " 'ע ג': 895,\n",
+ " 'ע ד': 896,\n",
+ " 'ע ה': 897,\n",
+ " 'ע א': 898,\n",
+ " 'ע ג': 899,\n",
+ " 'ע ד': 900,\n",
+ " 'ע ה': 901,\n",
+ " 'עא א': 902,\n",
+ " 'עא ב': 903,\n",
+ " 'עא ג': 904,\n",
+ " 'עא ד': 905,\n",
+ " 'עא ה': 906,\n",
+ " 'עא ו': 907,\n",
+ " 'עא ז': 908,\n",
+ " 'עא א': 909,\n",
+ " 'עא ב': 910,\n",
+ " 'עא ז': 911,\n",
+ " 'עב א': 912,\n",
+ " 'עב ב': 913,\n",
+ " 'עב ג': 914,\n",
+ " 'עב ד': 915,\n",
+ " 'עב ה': 916,\n",
+ " 'עב א': 917,\n",
+ " 'עב ב': 918,\n",
+ " 'עב ג': 919,\n",
+ " 'עב ד': 920,\n",
+ " 'עב ה': 921,\n",
+ " 'עג א': 922,\n",
+ " 'עג ב': 923,\n",
+ " 'עג ג': 924,\n",
+ " 'עג ד': 925,\n",
+ " 'עג א': 926,\n",
+ " 'עג ב': 927,\n",
+ " 'עג ג': 928,\n",
+ " 'עג ד': 929,\n",
+ " 'עד א': 930,\n",
+ " 'עד ב': 931,\n",
+ " 'עד ג': 932,\n",
+ " 'עד ד': 933,\n",
+ " 'עד ה': 934,\n",
+ " 'עד ו': 935,\n",
+ " 'עד א': 936,\n",
+ " 'עד ב': 937,\n",
+ " 'עד ג': 938,\n",
+ " 'עד ד': 939,\n",
+ " 'עד ה': 940,\n",
+ " 'עד ו': 941,\n",
+ " 'עה א': 942,\n",
+ " 'עה ב': 943,\n",
+ " 'עה ג': 944,\n",
+ " 'עה ד': 945,\n",
+ " 'עה ה': 946,\n",
+ " 'עה ו': 947,\n",
+ " 'עה א': 948,\n",
+ " 'עה ב': 949,\n",
+ " 'עה ג': 950,\n",
+ " 'עה ד': 951,\n",
+ " 'עה ו': 952,\n",
+ " 'עו א': 953,\n",
+ " 'עו ב': 954,\n",
+ " 'עו ג': 955,\n",
+ " 'עו ד': 956,\n",
+ " 'עו ה': 957,\n",
+ " 'עו ו': 958,\n",
+ " 'עו ז': 959,\n",
+ " 'עו ח': 960,\n",
+ " 'עו ב': 961,\n",
+ " 'עו ג': 962,\n",
+ " 'עו ד': 963,\n",
+ " 'עו ה': 964,\n",
+ " 'עו ו': 965,\n",
+ " 'עו ז': 966,\n",
+ " 'עו ח': 967,\n",
+ " 'עז א': 968,\n",
+ " 'עז ב': 969,\n",
+ " 'עז ב': 970,\n",
+ " 'עח א': 971,\n",
+ " 'עח א': 972,\n",
+ " 'עט א': 973,\n",
+ " 'עט ב': 974,\n",
+ " 'עט ג': 975,\n",
+ " 'עט ד': 976,\n",
+ " 'עט ה': 977,\n",
+ " 'עט ו': 978,\n",
+ " 'עט ז': 979,\n",
+ " 'עט ח': 980,\n",
+ " 'עט ט': 981,\n",
+ " 'עט א': 982,\n",
+ " 'עט ב': 983,\n",
+ " 'עט ג': 984,\n",
+ " 'עט ה': 985,\n",
+ " 'עט ו': 986,\n",
+ " 'עט ז': 987,\n",
+ " 'עט ט': 988,\n",
+ " 'פ א': 989,\n",
+ " 'פ א': 990,\n",
+ " 'פא א': 991,\n",
+ " 'פא ב': 992,\n",
+ " 'פא א': 993,\n",
+ " 'פא ב': 994,\n",
+ " 'פב א': 995,\n",
+ " 'פב ב': 996,\n",
+ " 'פב א': 997,\n",
+ " 'פב ב': 998,\n",
+ " 'פג א': 999,\n",
+ " ...}"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "label2id "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:00.483347Z",
+ "iopub.status.busy": "2023-12-05T04:05:00.483108Z",
+ "iopub.status.idle": "2023-12-05T04:05:02.948882Z",
+ "shell.execute_reply": "2023-12-05T04:05:02.947805Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:00.483323Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
+ "\n",
+ "model = AutoModelForSequenceClassification.from_pretrained(\n",
+ " 'sivan22/halacha-siman-seif-classifier', num_labels=len(simanim), id2label=id2label, label2id=label2id\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:02.953834Z",
+ "iopub.status.busy": "2023-12-05T04:05:02.953576Z",
+ "iopub.status.idle": "2023-12-05T04:05:05.676603Z",
+ "shell.execute_reply": "2023-12-05T04:05:05.675305Z",
+ "shell.execute_reply.started": "2023-12-05T04:05:02.953807Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "To disable this warning, you can either:\n",
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "To disable this warning, you can either:\n",
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "To disable this warning, you can either:\n",
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "To disable this warning, you can either:\n",
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import wandb\n",
+ "wandb.login(key='6c97e6e4c14921d819cfc93e221ac0d5406f059a')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-12-05T04:05:05.681680Z",
+ "iopub.status.busy": "2023-12-05T04:05:05.681446Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33msivan-ratson\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "To disable this warning, you can either:\n",
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "wandb version 0.16.0 is available! To upgrade, please run:\n",
+ " $ pip install wandb --upgrade"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Tracking run with wandb version 0.13.4"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Run data is saved locally in /notebooks/wandb/run-20231205_040508-3ptlrafb
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Syncing run graceful-wood-31 to Weights & Biases (docs)
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 295000 bytes\n",
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 295000 bytes\n",
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [ 6430/35955 1:15:24 < 5:46:20, 1.42 it/s, Epoch 2.68/15]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Epoch | \n",
+ " Training Loss | \n",
+ " Validation Loss | \n",
+ " Accuracy | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 5.281300 | \n",
+ " 4.988975 | \n",
+ " 0.291716 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4.603500 | \n",
+ " 4.606649 | \n",
+ " 0.330674 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "training_args = TrainingArguments(\n",
+ " output_dir=\"halacha-siman-seif-classifier\",\n",
+ " learning_rate=2e-5,\n",
+ " per_device_train_batch_size=16,\n",
+ " per_device_eval_batch_size=16,\n",
+ " num_train_epochs=15,\n",
+ " weight_decay=0.01,\n",
+ " evaluation_strategy=\"epoch\",\n",
+ " save_strategy=\"epoch\",\n",
+ " load_best_model_at_end=True,\n",
+ " push_to_hub=True,\n",
+ " resume_from_checkpoint=True \n",
+ "\n",
+ ")\n",
+ "\n",
+ "trainer = Trainer(\n",
+ " model=model,\n",
+ " args=training_args,\n",
+ " train_dataset=tokenized_dataset[\"train\"],\n",
+ " eval_dataset=tokenized_dataset[\"test\"],\n",
+ " tokenizer=tokenizer,\n",
+ " data_collator=data_collator,\n",
+ " compute_metrics=compute_metrics\n",
+ ")\n",
+ "\n",
+ "trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from transformers import pipeline\n",
+ "\n",
+ "classifier = pipeline( model=\"sivan22/halacha-siman-seif-classifier\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text = \"כמה פעמים נוטלים ידים לפני אכילת לחם\"\n",
+ "classifier(text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}