{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "e68b6e6997844bf788a057f9c7feedfb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_295e4080ccd64e48806a36b83e50ddfa", "IPY_MODEL_c4025862f06b412cb99165b67ad7daae", "IPY_MODEL_5ac369dab692489cb13cdb664c47fd96" ], "layout": "IPY_MODEL_434aa0b7bd76440d9b9b64d8b53133d3" } }, "295e4080ccd64e48806a36b83e50ddfa": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9e2a1fea814f408ebb4d15db83b1130b", "placeholder": "", "style": "IPY_MODEL_4a2f178864244d68bd915ee57379251d", "value": "Map: 100%" } }, "c4025862f06b412cb99165b67ad7daae": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7125f94d482a46999fd4dd3be1b3e87e", "max": 1148, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_96486cdef9714482a4ffa2aca1b3628b", "value": 1148 } }, "5ac369dab692489cb13cdb664c47fd96": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2364eb3ce5b345788902c5f9d316a00a", "placeholder": "", "style": "IPY_MODEL_52f799ea10d4403cb18e33ba80d739d3", "value": " 1148/1148 [00:01<00:00, 781.70 examples/s]" } }, "434aa0b7bd76440d9b9b64d8b53133d3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e2a1fea814f408ebb4d15db83b1130b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4a2f178864244d68bd915ee57379251d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7125f94d482a46999fd4dd3be1b3e87e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "96486cdef9714482a4ffa2aca1b3628b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "2364eb3ce5b345788902c5f9d316a00a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "52f799ea10d4403cb18e33ba80d739d3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3e18acb6f1504f4dace716a96e8d90f4": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_953e7d76140e4ed2ade688ccd5467a75", "IPY_MODEL_3a70d75b4eb949598e7cb9430acfcf81", "IPY_MODEL_54719990ff1f40cb8fed06badb378d01" ], "layout": "IPY_MODEL_5d1be2eaa2c143bbbc35f7d0f33f64de" } }, "953e7d76140e4ed2ade688ccd5467a75": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_002c9d35efa54fccb875a08e7059997f", "placeholder": "", "style": "IPY_MODEL_21dd8d7b7e5a4e27922ff1e3bec7745a", "value": "Map: 100%" } }, "3a70d75b4eb949598e7cb9430acfcf81": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_48abc963896a404886fbcf75b0b19bb9", "max": 287, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_87e3a17419334bf8b2448a8914f9d721", "value": 287 } }, "54719990ff1f40cb8fed06badb378d01": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f8303a91b4084791971947ca45c6b459", "placeholder": "", "style": "IPY_MODEL_a878599cc49347a896c793f3c45914e3", "value": " 287/287 [00:00<00:00, 556.23 examples/s]" } }, "5d1be2eaa2c143bbbc35f7d0f33f64de": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "002c9d35efa54fccb875a08e7059997f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "21dd8d7b7e5a4e27922ff1e3bec7745a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "48abc963896a404886fbcf75b0b19bb9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "87e3a17419334bf8b2448a8914f9d721": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f8303a91b4084791971947ca45c6b459": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a878599cc49347a896c793f3c45914e3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SRajt-tUH3ms", "outputId": "f6077695-1508-4b60-b33a-7a29f37b4c75" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n", "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.14.4)\n", "Requirement already satisfied: evaluate in /usr/local/lib/python3.10/dist-packages (0.4.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.2)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.7)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.3.0)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.15)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n", "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.18.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n" ] } ], "source": [ "! pip install transformers datasets evaluate" ] }, { "cell_type": "code", "source": [ "from transformers import AutoTokenizer\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")" ], "metadata": { "id": "rjE6lHHJJdyv" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "\n", "data = pd.read_csv(\"ielts_writing_dataset_new.csv\")\n", "\n", "data.label = data.label.replace(1,0)\n", "data.label = data.label.replace(3,0)\n", "data.label = data.label.replace(3.5,0)\n", "data.label = data.label.replace(4,0)\n", "data.label = data.label.replace(4.5,0)\n", "data.label = data.label.replace(5,0)\n", "data.label = data.label.replace(5.5,1)\n", "data.label = data.label.replace(6,1)\n", "data.label = data.label.replace(6.5,1)\n", "data.label = data.label.replace(7,1)\n", "data.label = data.label.replace(7.5,1)\n", "data.label = data.label.replace(8,2)\n", "data.label = data.label.replace(8.5,2)\n", "data.label = data.label.replace(9,2)\n", "\n", "data.label = data.label.astype(int)\n", "\n", "train, test = train_test_split(data, test_size=0.2)\n" ], "metadata": { "id": "GpD5w5t2JihL" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "data[:10]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "Cos-ypQ7n7d9", "outputId": "92caed9a-43e5-4a28-adf3-1727e3a15357" }, "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " label text\n", "0 1 Between 1995 and 2010, a study was conducted r...\n", "1 1 Poverty represents a worldwide crisis. It is t...\n", "2 0 The left chart shows the population change hap...\n", "3 1 Human beings are facing many challenges nowada...\n", "4 1 Information about the thousands of visits from...\n", "5 1 Whether countries should only invest facilitie...\n", "6 1 This graph depicts the changes in tourists vis...\n", "7 1 Sports is an essential part to most of us , so...\n", "8 2 The line graph illustrates the number of overs...\n", "9 2 International sports events require the most w..." ], "text/html": [ "\n", "\n", "
\n", " | label | \n", "text | \n", "
---|---|---|
0 | \n", "1 | \n", "Between 1995 and 2010, a study was conducted r... | \n", "
1 | \n", "1 | \n", "Poverty represents a worldwide crisis. It is t... | \n", "
2 | \n", "0 | \n", "The left chart shows the population change hap... | \n", "
3 | \n", "1 | \n", "Human beings are facing many challenges nowada... | \n", "
4 | \n", "1 | \n", "Information about the thousands of visits from... | \n", "
5 | \n", "1 | \n", "Whether countries should only invest facilitie... | \n", "
6 | \n", "1 | \n", "This graph depicts the changes in tourists vis... | \n", "
7 | \n", "1 | \n", "Sports is an essential part to most of us , so... | \n", "
8 | \n", "2 | \n", "The line graph illustrates the number of overs... | \n", "
9 | \n", "2 | \n", "International sports events require the most w... | \n", "
Epoch | \n", "Training Loss | \n", "Validation Loss | \n", "Accuracy | \n", "
---|---|---|---|
1 | \n", "No log | \n", "0.601437 | \n", "0.752613 | \n", "
2 | \n", "No log | \n", "0.444218 | \n", "0.860627 | \n", "
3 | \n", "No log | \n", "0.510611 | \n", "0.815331 | \n", "
4 | \n", "No log | \n", "0.723215 | \n", "0.766551 | \n", "
5 | \n", "No log | \n", "0.556284 | \n", "0.850174 | \n", "
6 | \n", "No log | \n", "0.783423 | \n", "0.794425 | \n", "
7 | \n", "0.275800 | \n", "0.735923 | \n", "0.850174 | \n", "
8 | \n", "0.275800 | \n", "0.654791 | \n", "0.878049 | \n", "
9 | \n", "0.275800 | \n", "0.633503 | \n", "0.888502 | \n", "
10 | \n", "0.275800 | \n", "1.105006 | \n", "0.783972 | \n", "
11 | \n", "0.275800 | \n", "0.710119 | \n", "0.878049 | \n", "
12 | \n", "0.275800 | \n", "0.792314 | \n", "0.839721 | \n", "
13 | \n", "0.275800 | \n", "0.863435 | \n", "0.843206 | \n", "
14 | \n", "0.018500 | \n", "0.834555 | \n", "0.843206 | \n", "
15 | \n", "0.018500 | \n", "0.864810 | \n", "0.832753 | \n", "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=1080, training_loss=0.13700703542541576, metrics={'train_runtime': 1752.9066, 'train_samples_per_second': 9.824, 'train_steps_per_second': 0.616, 'total_flos': 4194210824632584.0, 'train_loss': 0.13700703542541576, 'epoch': 15.0})" ] }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "!zip -r /content/checkpoint.zip /content/essayl0/checkpoint-1080/" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "s6wG4purBmfX", "outputId": "3363587c-a6e3-4a40-db80-73d6eaf26cf7" }, "execution_count": 18, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " adding: content/essayl0/checkpoint-1080/ (stored 0%)\n", " adding: content/essayl0/checkpoint-1080/special_tokens_map.json (deflated 42%)\n", " adding: content/essayl0/checkpoint-1080/rng_state.pth (deflated 28%)\n", " adding: content/essayl0/checkpoint-1080/vocab.txt (deflated 53%)\n", " adding: content/essayl0/checkpoint-1080/tokenizer.json (deflated 71%)\n", " adding: content/essayl0/checkpoint-1080/config.json (deflated 50%)\n", " adding: content/essayl0/checkpoint-1080/trainer_state.json (deflated 78%)\n", " adding: content/essayl0/checkpoint-1080/pytorch_model.bin (deflated 7%)\n", " adding: content/essayl0/checkpoint-1080/optimizer.pt (deflated 21%)\n", " adding: content/essayl0/checkpoint-1080/training_args.bin (deflated 48%)\n", " adding: content/essayl0/checkpoint-1080/tokenizer_config.json (deflated 43%)\n", " adding: content/essayl0/checkpoint-1080/scheduler.pt (deflated 49%)\n" ] } ] } ] }