{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "V100"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "b87c718ffa014b698f5302ac4b85cfbc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_cb97a7229b5b4f459667c44142048584",
              "IPY_MODEL_2366c60a468a4bdaa4c20f9b0d379e61",
              "IPY_MODEL_a8c724b552f84e999ff1e3fe88eb0f1d"
            ],
            "layout": "IPY_MODEL_cf5e4a8ccf0343d0be2d081ba55b23ba"
          }
        },
        "cb97a7229b5b4f459667c44142048584": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7d52639c06804f758bcc380a5adb39fc",
            "placeholder": "​",
            "style": "IPY_MODEL_360ea7f698c14b3484830110aaff859d",
            "value": "spiece.model: 100%"
          }
        },
        "2366c60a468a4bdaa4c20f9b0d379e61": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8d3208bae8f043d99be8298afb4016af",
            "max": 791656,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_ef2b8fbfb131431484a3c95f3d13de67",
            "value": 791656
          }
        },
        "a8c724b552f84e999ff1e3fe88eb0f1d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f4ade4961f154ee2934160a530806093",
            "placeholder": "​",
            "style": "IPY_MODEL_b8f34d5bd4f54428bbd0a15bfc7ca617",
            "value": " 792k/792k [00:00&lt;00:00, 2.45MB/s]"
          }
        },
        "cf5e4a8ccf0343d0be2d081ba55b23ba": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7d52639c06804f758bcc380a5adb39fc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "360ea7f698c14b3484830110aaff859d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8d3208bae8f043d99be8298afb4016af": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef2b8fbfb131431484a3c95f3d13de67": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f4ade4961f154ee2934160a530806093": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b8f34d5bd4f54428bbd0a15bfc7ca617": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "766a501c262b4ad88b705c78e01da38b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9e90f5eda3454b83a4fb3ceac3dca8f7",
              "IPY_MODEL_6f24fb85ada64b52acc31ad0295ef427",
              "IPY_MODEL_ad9ac6877f2043e59fb0916928433b90"
            ],
            "layout": "IPY_MODEL_3099f67fc7134681b93ffa6d4453fd9e"
          }
        },
        "9e90f5eda3454b83a4fb3ceac3dca8f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cbd5aaff0e0540b0bc127a88369b4852",
            "placeholder": "​",
            "style": "IPY_MODEL_989eeccc4a7148fcbb17ff753ce1bb82",
            "value": "tokenizer.json: 100%"
          }
        },
        "6f24fb85ada64b52acc31ad0295ef427": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c651cbf7cc1143e3b91621646c2d9e4c",
            "max": 1389353,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_a6140294e36a40a2bcd0ff4de8452d66",
            "value": 1389353
          }
        },
        "ad9ac6877f2043e59fb0916928433b90": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9dd6162cab974aa0b3c3ce0480a507e8",
            "placeholder": "​",
            "style": "IPY_MODEL_9b9b8b2cf24341aa8a93c28f48f7026f",
            "value": " 1.39M/1.39M [00:00&lt;00:00, 4.23MB/s]"
          }
        },
        "3099f67fc7134681b93ffa6d4453fd9e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cbd5aaff0e0540b0bc127a88369b4852": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "989eeccc4a7148fcbb17ff753ce1bb82": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c651cbf7cc1143e3b91621646c2d9e4c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a6140294e36a40a2bcd0ff4de8452d66": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "9dd6162cab974aa0b3c3ce0480a507e8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9b9b8b2cf24341aa8a93c28f48f7026f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "470127267c5f494990a1b1045d14ea78": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_4b58e9b647754187a1da8e4aa76fba16",
              "IPY_MODEL_ba5f5aa83c314e7a855a52f933fd036d",
              "IPY_MODEL_8dfd49369fb04e1fa4eba898d32ef9e0"
            ],
            "layout": "IPY_MODEL_b689524c5fb649189544fe711b52dfad"
          }
        },
        "4b58e9b647754187a1da8e4aa76fba16": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e4b1c27fad4e439eafb92c8cdaa6805d",
            "placeholder": "​",
            "style": "IPY_MODEL_2f9a191d5de64bd0a66c9b1bb3056cff",
            "value": "config.json: 100%"
          }
        },
        "ba5f5aa83c314e7a855a52f933fd036d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2b67c83f83f841cc868698e4de992a5e",
            "max": 1208,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_0c210297cc234a8f95f13ab5d5a27017",
            "value": 1208
          }
        },
        "8dfd49369fb04e1fa4eba898d32ef9e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7b7acc7c41b24c4da4bd010ce3f26672",
            "placeholder": "​",
            "style": "IPY_MODEL_2115a85109ab46e0bd6f22933a09c2c9",
            "value": " 1.21k/1.21k [00:00&lt;00:00, 90.9kB/s]"
          }
        },
        "b689524c5fb649189544fe711b52dfad": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e4b1c27fad4e439eafb92c8cdaa6805d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2f9a191d5de64bd0a66c9b1bb3056cff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2b67c83f83f841cc868698e4de992a5e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0c210297cc234a8f95f13ab5d5a27017": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "7b7acc7c41b24c4da4bd010ce3f26672": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2115a85109ab46e0bd6f22933a09c2c9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "934d2bd4c7f14719897a632189585bf2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_322b61fabafd46289cd92dc0d064b50a",
              "IPY_MODEL_a6e709590ed74314b91990c36f1c5f03",
              "IPY_MODEL_f052fe4d2c3644f294512dbeddff515d"
            ],
            "layout": "IPY_MODEL_05077183f5184805b05f32175e30c645"
          }
        },
        "322b61fabafd46289cd92dc0d064b50a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_dfe84194bc90499d97b444b7f57b1091",
            "placeholder": "​",
            "style": "IPY_MODEL_27b04743f9ca4bc3be5cc693252a63ba",
            "value": "model.safetensors: 100%"
          }
        },
        "a6e709590ed74314b91990c36f1c5f03": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e82307c2afe04518ba58109eaf460b89",
            "max": 891646390,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_63512e102325459cbf8659efb7e75fd4",
            "value": 891646390
          }
        },
        "f052fe4d2c3644f294512dbeddff515d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_75b5501fcd4e4e809eb1a9cb21fc0a0c",
            "placeholder": "​",
            "style": "IPY_MODEL_3945b5e0289542e7b330d5a81e93623b",
            "value": " 892M/892M [00:02&lt;00:00, 390MB/s]"
          }
        },
        "05077183f5184805b05f32175e30c645": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "dfe84194bc90499d97b444b7f57b1091": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "27b04743f9ca4bc3be5cc693252a63ba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e82307c2afe04518ba58109eaf460b89": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "63512e102325459cbf8659efb7e75fd4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "75b5501fcd4e4e809eb1a9cb21fc0a0c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3945b5e0289542e7b330d5a81e93623b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b6aa0aa60397484fa7a4824859a3badd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c89d0d82fc9440bd996a27f8ae8e5411",
              "IPY_MODEL_abfae4595dd64f23bf73aa765c3b10f8",
              "IPY_MODEL_e23fe9ac4f6c4115aa976a0925a1b4b9"
            ],
            "layout": "IPY_MODEL_c1b8e20ddbdd4e189b93d2c0e66ac7ba"
          }
        },
        "c89d0d82fc9440bd996a27f8ae8e5411": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b4d6e7a2455947e19a9ce0914dfdba9c",
            "placeholder": "​",
            "style": "IPY_MODEL_ad8d3584468f4dfd9e2269f9fafc52e4",
            "value": "generation_config.json: 100%"
          }
        },
        "abfae4595dd64f23bf73aa765c3b10f8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_30c1d242f2af438995d7bb175d80e52f",
            "max": 147,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_77c83e27bde54a679fc785d86dbc6a0c",
            "value": 147
          }
        },
        "e23fe9ac4f6c4115aa976a0925a1b4b9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_75e56ac1e91a4d50ad4f33f9e74d413a",
            "placeholder": "​",
            "style": "IPY_MODEL_d6725d7e91d54658a9a495b071339bd5",
            "value": " 147/147 [00:00&lt;00:00, 10.8kB/s]"
          }
        },
        "c1b8e20ddbdd4e189b93d2c0e66ac7ba": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b4d6e7a2455947e19a9ce0914dfdba9c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ad8d3584468f4dfd9e2269f9fafc52e4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "30c1d242f2af438995d7bb175d80e52f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "77c83e27bde54a679fc785d86dbc6a0c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "75e56ac1e91a4d50ad4f33f9e74d413a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d6725d7e91d54658a9a495b071339bd5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f7bc8db3de294fe2ad41a7f94322925b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9ecbba7b92884960a1021f8d2c707b3c",
              "IPY_MODEL_9b948078a87c4850a5520d5aaf7485d6",
              "IPY_MODEL_3c10a4ec4d0a4408b892172f9d9c07d7"
            ],
            "layout": "IPY_MODEL_7d5680b27a6c4f23bdec661851b3bb08"
          }
        },
        "9ecbba7b92884960a1021f8d2c707b3c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_79f075494ff14f0ba292b5d95ac88393",
            "placeholder": "​",
            "style": "IPY_MODEL_1ce3a00bf77146b2a45b013b3bdfa8aa",
            "value": "Downloading builder script: 100%"
          }
        },
        "9b948078a87c4850a5520d5aaf7485d6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cedcb09b9ee8429b99fe2d8725965ae7",
            "max": 5348,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2831f41d9bda480f9c671b56ce24329a",
            "value": 5348
          }
        },
        "3c10a4ec4d0a4408b892172f9d9c07d7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_265a75becb30411d9d3034c13627710e",
            "placeholder": "​",
            "style": "IPY_MODEL_9ecb1f1f0dbf4a7e99822dd69f6cfe4f",
            "value": " 5.35k/5.35k [00:00&lt;00:00, 434kB/s]"
          }
        },
        "7d5680b27a6c4f23bdec661851b3bb08": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "79f075494ff14f0ba292b5d95ac88393": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1ce3a00bf77146b2a45b013b3bdfa8aa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cedcb09b9ee8429b99fe2d8725965ae7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2831f41d9bda480f9c671b56ce24329a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "265a75becb30411d9d3034c13627710e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9ecb1f1f0dbf4a7e99822dd69f6cfe4f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2cf18a3d5d074d659676ad3fb93d9dba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_4f49a2e2ba294c7897c2ff3207e14f50",
              "IPY_MODEL_2e377e0bdbe740448670e0ea2c5f6d36",
              "IPY_MODEL_a0a2c4aa7d2a41f1a3042749a37f6150"
            ],
            "layout": "IPY_MODEL_d74459ab45c14a338a36cb1ac3c62249"
          }
        },
        "4f49a2e2ba294c7897c2ff3207e14f50": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_add98f7d7cf74ef8a746bb9ce0e17695",
            "placeholder": "​",
            "style": "IPY_MODEL_274a4f7529174c358aa90e27547671bc",
            "value": "Downloading readme: 100%"
          }
        },
        "2e377e0bdbe740448670e0ea2c5f6d36": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_936c4bafb94842db80722e3f4bea4ca2",
            "max": 8269,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_274a8fafe1a8411a888edd65e5574014",
            "value": 8269
          }
        },
        "a0a2c4aa7d2a41f1a3042749a37f6150": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c3fc4b71b6514b0c9f40ed389bab512d",
            "placeholder": "​",
            "style": "IPY_MODEL_7d41c294ca314b19b493fd2c5e935b48",
            "value": " 8.27k/8.27k [00:00&lt;00:00, 657kB/s]"
          }
        },
        "d74459ab45c14a338a36cb1ac3c62249": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "add98f7d7cf74ef8a746bb9ce0e17695": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "274a4f7529174c358aa90e27547671bc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "936c4bafb94842db80722e3f4bea4ca2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "274a8fafe1a8411a888edd65e5574014": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c3fc4b71b6514b0c9f40ed389bab512d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7d41c294ca314b19b493fd2c5e935b48": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6ab45d023ab545be9a8a8fe2283d8f00": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_49c9b880f13242d592092e5bd8cc1acb",
              "IPY_MODEL_9c91c38f13cf4c4d8db35f12eff9ed1a",
              "IPY_MODEL_fdd89353c9d040e3838e773a6bec7a79"
            ],
            "layout": "IPY_MODEL_a6b08da8e69d466089418c3ef8aeb97f"
          }
        },
        "49c9b880f13242d592092e5bd8cc1acb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fa39adfc238143859d537ccd7848d99e",
            "placeholder": "​",
            "style": "IPY_MODEL_4d03cf5626ae45d3b1f6505a16a1cc4c",
            "value": "Downloading data: 100%"
          }
        },
        "9c91c38f13cf4c4d8db35f12eff9ed1a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8fcf28a815e2454e8d400a00f9518d84",
            "max": 3624420843,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_1b86816ba0de4c99b1549c6f89203939",
            "value": 3624420843
          }
        },
        "fdd89353c9d040e3838e773a6bec7a79": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_906ce956fa3241a5b66d30be1de8f274",
            "placeholder": "​",
            "style": "IPY_MODEL_150fdaad29084b36b4c0bbb8bc332c0a",
            "value": " 3.62G/3.62G [01:54&lt;00:00, 33.5MB/s]"
          }
        },
        "a6b08da8e69d466089418c3ef8aeb97f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fa39adfc238143859d537ccd7848d99e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4d03cf5626ae45d3b1f6505a16a1cc4c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8fcf28a815e2454e8d400a00f9518d84": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1b86816ba0de4c99b1549c6f89203939": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "906ce956fa3241a5b66d30be1de8f274": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "150fdaad29084b36b4c0bbb8bc332c0a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9b2ccaebd23340ec9b1c3024b0e6f511": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c7813f4a267048328b968ad5d4d186a6",
              "IPY_MODEL_9bc0fbe610a142a383d9230287077f5d",
              "IPY_MODEL_f8b4b3fd71ea46d5938170b07139695c"
            ],
            "layout": "IPY_MODEL_d5c113554226482fbfc86be641e5f6b0"
          }
        },
        "c7813f4a267048328b968ad5d4d186a6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8986df88a6a547d6a5534c727a7af3b6",
            "placeholder": "​",
            "style": "IPY_MODEL_e32e33d0a2c944589a0bbe3c430c7073",
            "value": "Downloading data: 100%"
          }
        },
        "9bc0fbe610a142a383d9230287077f5d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f93626e6584c4e1592acb6c4a339c124",
            "max": 880225504,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_7c87be58c32a4c4bbd004bc80073d3a6",
            "value": 880225504
          }
        },
        "f8b4b3fd71ea46d5938170b07139695c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7cf7f378f7ce47269d77d57a6282b27e",
            "placeholder": "​",
            "style": "IPY_MODEL_70d42f06a4364fbba2fcfc9c0676bc27",
            "value": " 880M/880M [00:26&lt;00:00, 34.3MB/s]"
          }
        },
        "d5c113554226482fbfc86be641e5f6b0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8986df88a6a547d6a5534c727a7af3b6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e32e33d0a2c944589a0bbe3c430c7073": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f93626e6584c4e1592acb6c4a339c124": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7c87be58c32a4c4bbd004bc80073d3a6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "7cf7f378f7ce47269d77d57a6282b27e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "70d42f06a4364fbba2fcfc9c0676bc27": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fdaf03881d764c8f8b028788f375736e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9d791ecca6f04dde95057cea781cae06",
              "IPY_MODEL_90f4850dab6e4e7d8b8b2e0af1134583",
              "IPY_MODEL_8d6f1c239e2a4ded98772b96dda25998"
            ],
            "layout": "IPY_MODEL_e46befd1c41544f0b7a9d5bd713f77c6"
          }
        },
        "9d791ecca6f04dde95057cea781cae06": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2e5d1634e63045bd920ca80be70fef59",
            "placeholder": "​",
            "style": "IPY_MODEL_caf5ddc67d294393badb7504de339da9",
            "value": "Generating train split: 100%"
          }
        },
        "90f4850dab6e4e7d8b8b2e0af1134583": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_977de4350bf64a33a8a06256932d4180",
            "max": 119924,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_7e35bd9ab19d47ceb977832a1565ee2d",
            "value": 119924
          }
        },
        "8d6f1c239e2a4ded98772b96dda25998": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0860221939524eac9b8ed1764893fff2",
            "placeholder": "​",
            "style": "IPY_MODEL_e0df0c3d092a45959134930ba73b2043",
            "value": " 119924/119924 [00:25&lt;00:00, 4690.11 examples/s]"
          }
        },
        "e46befd1c41544f0b7a9d5bd713f77c6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2e5d1634e63045bd920ca80be70fef59": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "caf5ddc67d294393badb7504de339da9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "977de4350bf64a33a8a06256932d4180": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7e35bd9ab19d47ceb977832a1565ee2d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0860221939524eac9b8ed1764893fff2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e0df0c3d092a45959134930ba73b2043": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9d784138f4824502b5655747566531e1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_3beba626a74444c39da002ad3a54c752",
              "IPY_MODEL_534cbaac696142fc9732314fdfeb3340",
              "IPY_MODEL_c287534756c9429ab3f2501771a3a50c"
            ],
            "layout": "IPY_MODEL_9e0c2c48fbef451da11712f7eef10288"
          }
        },
        "3beba626a74444c39da002ad3a54c752": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_97abeeff4a7e4fea846f4497d8286986",
            "placeholder": "​",
            "style": "IPY_MODEL_ab099bef23b94a71b5263fa5c18677fc",
            "value": "Generating validation split: 100%"
          }
        },
        "534cbaac696142fc9732314fdfeb3340": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b22f26cea34e4bae8cb330fdda989790",
            "max": 6633,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_3e4b9264858f432aa400502bfc96e710",
            "value": 6633
          }
        },
        "c287534756c9429ab3f2501771a3a50c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ad704ec477814b5c9fa61c06d69a4eab",
            "placeholder": "​",
            "style": "IPY_MODEL_3b8231f1efcc4966b1a9fe7ec1a34d83",
            "value": " 6633/6633 [00:01&lt;00:00, 4421.39 examples/s]"
          }
        },
        "9e0c2c48fbef451da11712f7eef10288": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "97abeeff4a7e4fea846f4497d8286986": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ab099bef23b94a71b5263fa5c18677fc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b22f26cea34e4bae8cb330fdda989790": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3e4b9264858f432aa400502bfc96e710": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "ad704ec477814b5c9fa61c06d69a4eab": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3b8231f1efcc4966b1a9fe7ec1a34d83": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "eac2fadbfed34bd3bb1120ddcc47c23c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_76982b6250d04d0e9c5a2aac26b7c600",
              "IPY_MODEL_0ac7307c4db845c699d4fe9429c69fff",
              "IPY_MODEL_296fa08276994439ad9cc52c51482f9b"
            ],
            "layout": "IPY_MODEL_b1413ad0045649e6badc92aa244bf65f"
          }
        },
        "76982b6250d04d0e9c5a2aac26b7c600": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0d8fd7f78c564c958274568d95fd3548",
            "placeholder": "​",
            "style": "IPY_MODEL_2ae1ebd8770241088b3d6c34d83bac98",
            "value": "Generating test split: 100%"
          }
        },
        "0ac7307c4db845c699d4fe9429c69fff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0563d336b0784dc093cdbca6ab507e7b",
            "max": 6658,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2fc8c558d42e49148a5197fe0d29424b",
            "value": 6658
          }
        },
        "296fa08276994439ad9cc52c51482f9b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0577b9be3f1040ec837fda28499fb4c2",
            "placeholder": "​",
            "style": "IPY_MODEL_616ece7a8c4149f3ba13bf2ba9c1af1c",
            "value": " 6658/6658 [00:01&lt;00:00, 4619.69 examples/s]"
          }
        },
        "b1413ad0045649e6badc92aa244bf65f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0d8fd7f78c564c958274568d95fd3548": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2ae1ebd8770241088b3d6c34d83bac98": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "0563d336b0784dc093cdbca6ab507e7b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2fc8c558d42e49148a5197fe0d29424b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0577b9be3f1040ec837fda28499fb4c2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "616ece7a8c4149f3ba13bf2ba9c1af1c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "abTBSx9WTstf"
      },
      "outputs": [],
      "source": [
        "#Learned from https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb#scrollTo=j9TNdHlQ0CLz"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Install Libraries***"
      ],
      "metadata": {
        "id": "dTTHKfyBT9xw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install transformers -q\n",
        "!pip install wandb -q\n",
        "!pip install datasets\n",
        "!pip install peft\n",
        "!pip install peft accelerate loralib --upgrade --quiet\n",
        "!pip install torchsummary\n",
        "!pip install rouge-score\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "siMmjZzyUBb7",
        "outputId": "13631e2c-16b4-48c6-a017-6cf7a3b4cbc7"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.3/207.3 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m266.1/266.1 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting datasets\n",
            "  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.13.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n",
            "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n",
            "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n",
            "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
            "  Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.0.3)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.2)\n",
            "Collecting xxhash (from datasets)\n",
            "  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting multiprocess (from datasets)\n",
            "  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: fsspec[http]<=2024.2.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n",
            "Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.20.3)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.0)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->datasets) (4.10.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n",
            "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
            "Installing collected packages: xxhash, dill, multiprocess, datasets\n",
            "Successfully installed datasets-2.18.0 dill-0.3.8 multiprocess-0.70.16 xxhash-3.4.1\n",
            "Collecting peft\n",
            "  Downloading peft-0.10.0-py3-none-any.whl (199 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.1/199.1 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft) (1.25.2)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft) (24.0)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft) (6.0.1)\n",
            "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.2.1+cu121)\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft) (4.38.2)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft) (4.66.2)\n",
            "Collecting accelerate>=0.21.0 (from peft)\n",
            "  Downloading accelerate-0.29.1-py3-none-any.whl (297 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.3/297.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft) (0.4.2)\n",
            "Requirement already satisfied: huggingface-hub>=0.17.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.20.3)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (3.13.3)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (2023.6.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (2.31.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (4.10.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.2.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.3)\n",
            "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m73.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m68.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m102.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-nccl-cu12==2.19.3 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.13.0->peft)\n",
            "  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.2.0)\n",
            "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13.0->peft)\n",
            "  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m87.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (2023.12.25)\n",
            "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (0.15.2)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.5)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (2024.2.2)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\n",
            "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, accelerate, peft\n",
            "Successfully installed accelerate-0.29.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 peft-0.10.0\n",
            "Requirement already satisfied: torchsummary in /usr/local/lib/python3.10/dist-packages (1.5.1)\n",
            "Collecting rouge-score\n",
            "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.4.0)\n",
            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score) (3.8.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.25.2)\n",
            "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.16.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (8.1.7)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (1.3.2)\n",
            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (2023.12.25)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (4.66.2)\n",
            "Building wheels for collected packages: rouge-score\n",
            "  Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=11cb4942e5ab4c262714a4c51613d0cc45ee04f2cb9873f5103c2026818c6e1b\n",
            "  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
            "Successfully built rouge-score\n",
            "Installing collected packages: rouge-score\n",
            "Successfully installed rouge-score-0.1.2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Import Libraries***"
      ],
      "metadata": {
        "id": "djwj7IJNUGus"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Importing stock libraries\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import torch\n",
        "import torch.nn.functional as F\n",
        "import torch.nn as nn\n",
        "from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler\n",
        "\n",
        "# Importing the T5 modules from huggingface/transformers\n",
        "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
        "from datasets import load_dataset\n",
        "\n",
        "# WandB – Import the wandb library\n",
        "import wandb\n",
        "\n",
        "# import PEFT and LoRA Stuffs\n",
        "from peft import LoraConfig, get_peft_model, TaskType\n",
        "\n",
        "from torchsummary import summary\n",
        "\n",
        "from rouge_score import rouge_scorer\n",
        "\n",
        "# Datetime for adding timestamps to training outputs\n",
        "from datetime import datetime\n",
        "import pytz\n",
        "\n",
        "# General use like saving models\n",
        "import os\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "M2nIos7NUJ5z"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Import Dataset***"
      ],
      "metadata": {
        "id": "6FwJYs_0UTWp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# load in the pubmed version of the dataset\n",
        "dataset = load_dataset(\"scientific_papers\", 'pubmed')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 431,
          "referenced_widgets": [
            "f7bc8db3de294fe2ad41a7f94322925b",
            "9ecbba7b92884960a1021f8d2c707b3c",
            "9b948078a87c4850a5520d5aaf7485d6",
            "3c10a4ec4d0a4408b892172f9d9c07d7",
            "7d5680b27a6c4f23bdec661851b3bb08",
            "79f075494ff14f0ba292b5d95ac88393",
            "1ce3a00bf77146b2a45b013b3bdfa8aa",
            "cedcb09b9ee8429b99fe2d8725965ae7",
            "2831f41d9bda480f9c671b56ce24329a",
            "265a75becb30411d9d3034c13627710e",
            "9ecb1f1f0dbf4a7e99822dd69f6cfe4f",
            "2cf18a3d5d074d659676ad3fb93d9dba",
            "4f49a2e2ba294c7897c2ff3207e14f50",
            "2e377e0bdbe740448670e0ea2c5f6d36",
            "a0a2c4aa7d2a41f1a3042749a37f6150",
            "d74459ab45c14a338a36cb1ac3c62249",
            "add98f7d7cf74ef8a746bb9ce0e17695",
            "274a4f7529174c358aa90e27547671bc",
            "936c4bafb94842db80722e3f4bea4ca2",
            "274a8fafe1a8411a888edd65e5574014",
            "c3fc4b71b6514b0c9f40ed389bab512d",
            "7d41c294ca314b19b493fd2c5e935b48",
            "6ab45d023ab545be9a8a8fe2283d8f00",
            "49c9b880f13242d592092e5bd8cc1acb",
            "9c91c38f13cf4c4d8db35f12eff9ed1a",
            "fdd89353c9d040e3838e773a6bec7a79",
            "a6b08da8e69d466089418c3ef8aeb97f",
            "fa39adfc238143859d537ccd7848d99e",
            "4d03cf5626ae45d3b1f6505a16a1cc4c",
            "8fcf28a815e2454e8d400a00f9518d84",
            "1b86816ba0de4c99b1549c6f89203939",
            "906ce956fa3241a5b66d30be1de8f274",
            "150fdaad29084b36b4c0bbb8bc332c0a",
            "9b2ccaebd23340ec9b1c3024b0e6f511",
            "c7813f4a267048328b968ad5d4d186a6",
            "9bc0fbe610a142a383d9230287077f5d",
            "f8b4b3fd71ea46d5938170b07139695c",
            "d5c113554226482fbfc86be641e5f6b0",
            "8986df88a6a547d6a5534c727a7af3b6",
            "e32e33d0a2c944589a0bbe3c430c7073",
            "f93626e6584c4e1592acb6c4a339c124",
            "7c87be58c32a4c4bbd004bc80073d3a6",
            "7cf7f378f7ce47269d77d57a6282b27e",
            "70d42f06a4364fbba2fcfc9c0676bc27",
            "fdaf03881d764c8f8b028788f375736e",
            "9d791ecca6f04dde95057cea781cae06",
            "90f4850dab6e4e7d8b8b2e0af1134583",
            "8d6f1c239e2a4ded98772b96dda25998",
            "e46befd1c41544f0b7a9d5bd713f77c6",
            "2e5d1634e63045bd920ca80be70fef59",
            "caf5ddc67d294393badb7504de339da9",
            "977de4350bf64a33a8a06256932d4180",
            "7e35bd9ab19d47ceb977832a1565ee2d",
            "0860221939524eac9b8ed1764893fff2",
            "e0df0c3d092a45959134930ba73b2043",
            "9d784138f4824502b5655747566531e1",
            "3beba626a74444c39da002ad3a54c752",
            "534cbaac696142fc9732314fdfeb3340",
            "c287534756c9429ab3f2501771a3a50c",
            "9e0c2c48fbef451da11712f7eef10288",
            "97abeeff4a7e4fea846f4497d8286986",
            "ab099bef23b94a71b5263fa5c18677fc",
            "b22f26cea34e4bae8cb330fdda989790",
            "3e4b9264858f432aa400502bfc96e710",
            "ad704ec477814b5c9fa61c06d69a4eab",
            "3b8231f1efcc4966b1a9fe7ec1a34d83",
            "eac2fadbfed34bd3bb1120ddcc47c23c",
            "76982b6250d04d0e9c5a2aac26b7c600",
            "0ac7307c4db845c699d4fe9429c69fff",
            "296fa08276994439ad9cc52c51482f9b",
            "b1413ad0045649e6badc92aa244bf65f",
            "0d8fd7f78c564c958274568d95fd3548",
            "2ae1ebd8770241088b3d6c34d83bac98",
            "0563d336b0784dc093cdbca6ab507e7b",
            "2fc8c558d42e49148a5197fe0d29424b",
            "0577b9be3f1040ec837fda28499fb4c2",
            "616ece7a8c4149f3ba13bf2ba9c1af1c"
          ]
        },
        "id": "SVMHiJkdURt8",
        "outputId": "16db095a-0dd9-44c3-faab-60bc2b5d2368"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n",
            "/usr/local/lib/python3.10/dist-packages/datasets/load.py:1461: FutureWarning: The repository for scientific_papers contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/scientific_papers\n",
            "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
            "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading builder script:   0%|          | 0.00/5.35k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f7bc8db3de294fe2ad41a7f94322925b"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading readme:   0%|          | 0.00/8.27k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "2cf18a3d5d074d659676ad3fb93d9dba"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading data:   0%|          | 0.00/3.62G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "6ab45d023ab545be9a8a8fe2283d8f00"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading data:   0%|          | 0.00/880M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9b2ccaebd23340ec9b1c3024b0e6f511"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating train split:   0%|          | 0/119924 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "fdaf03881d764c8f8b028788f375736e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating validation split:   0%|          | 0/6633 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9d784138f4824502b5655747566531e1"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating test split:   0%|          | 0/6658 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "eac2fadbfed34bd3bb1120ddcc47c23c"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Check GPU***\n"
      ],
      "metadata": {
        "id": "P-sPJmvGU4dz"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!nvidia-smi\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WKnNNZdDU8RG",
        "outputId": "1926704e-fb17-49f6-c7db-74c20befd3c1"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Sat Apr  6 21:17:59 2024       \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n",
            "|-----------------------------------------+----------------------+----------------------+\n",
            "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                                         |                      |               MIG M. |\n",
            "|=========================================+======================+======================|\n",
            "|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   35C    P0              26W / 300W |      0MiB / 16384MiB |      0%      Default |\n",
            "|                                         |                      |                  N/A |\n",
            "+-----------------------------------------+----------------------+----------------------+\n",
            "                                                                                         \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| Processes:                                                                            |\n",
            "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
            "|        ID   ID                                                             Usage      |\n",
            "|=======================================================================================|\n",
            "|  No running processes found                                                           |\n",
            "+---------------------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Use GPU***"
      ],
      "metadata": {
        "id": "8K47hh8IVAoW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# # Setting up the device for GPU usage\n",
        "from torch import cuda\n",
        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
        "print (\"Using: \", device)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rYMeoZoRVDw8",
        "outputId": "ca074dfc-47d6-4265-fd40-121362866f17"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Using:  cuda\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Use wandb***"
      ],
      "metadata": {
        "id": "KGscKLi1VQ83"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#!wandb login\n"
      ],
      "metadata": {
        "id": "AWS8TgvlVU8E"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Function for saving pretrained model and tokenizer"
      ],
      "metadata": {
        "id": "FBcBZl8VELD3"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def save_to_hf(model, tokenizer, model_name='saved_model'):\n",
        "\n",
        "  # create folder to save model and tokenizer to\n",
        "  cwd = os.getcwd()\n",
        "  model_save_path = os.path.join(cwd, model_name)\n",
        "  os.makedirs(model_save_path, exist_ok=False)\n",
        "\n",
        "  # save model to local instance of colab\n",
        "  model.save_pretrained(model_save_path)\n",
        "\n",
        "  # save tokenizer to local instance of colab\n",
        "  tokenizer.save_pretrained(model_save_path)\n",
        "\n",
        "  # login to hugging face to upload\n",
        "  !huggingface-cli login\n",
        "\n",
        "  # upload model to hugging face\n",
        "  !huggingface-cli upload {model_name}"
      ],
      "metadata": {
        "id": "s3A3hWmkEThE"
      },
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Custom Dataset Class***"
      ],
      "metadata": {
        "id": "5bTE_OwUVb3S"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Creating a custom dataset for reading the dataframe and loading it into the dataloader to pass it to the neural network at a later stage for finetuning the model and to prepare it for predictions\n",
        "\n",
        "class CustomDataset(Dataset):\n",
        "\n",
        "    def __init__(self, dataframe, tokenizer, source_len, summ_len):\n",
        "        self.tokenizer = tokenizer\n",
        "        self.data = dataframe\n",
        "        self.source_len = source_len\n",
        "        self.summ_len = summ_len\n",
        "        self.text = self.data.abstract\n",
        "        self.ctext = self.data.article\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.text)\n",
        "\n",
        "    def __getitem__(self, index):\n",
        "        ctext = str(self.ctext[index])\n",
        "        ctext = ' '.join(ctext.split())\n",
        "\n",
        "        text = str(self.text[index])\n",
        "        text = ' '.join(text.split())\n",
        "\n",
        "        source = self.tokenizer.batch_encode_plus([ctext], max_length= self.source_len, pad_to_max_length=True,return_tensors='pt')\n",
        "        target = self.tokenizer.batch_encode_plus([text], max_length= self.summ_len, pad_to_max_length=True,return_tensors='pt')\n",
        "\n",
        "        source_ids = source['input_ids'].squeeze()\n",
        "        source_mask = source['attention_mask'].squeeze()\n",
        "        target_ids = target['input_ids'].squeeze()\n",
        "        target_mask = target['attention_mask'].squeeze()\n",
        "\n",
        "        return {\n",
        "            'source_ids': source_ids.to(dtype=torch.long),\n",
        "            'source_mask': source_mask.to(dtype=torch.long),\n",
        "            'target_ids': target_ids.to(dtype=torch.long),\n",
        "            'target_ids_y': target_ids.to(dtype=torch.long)\n",
        "        }"
      ],
      "metadata": {
        "id": "fQYOv6djVe8L"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Training Function***"
      ],
      "metadata": {
        "id": "e8Ihs9VXV8b3"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def train(epoch, tokenizer, model, device, loader, optimizer):\n",
        "    model.train()\n",
        "    for _,data in enumerate(loader, 0):\n",
        "        y = data['target_ids'].to(device, dtype = torch.long)\n",
        "        y_ids = y[:, :-1].contiguous()\n",
        "        labels = y[:, 1:].clone().detach()\n",
        "        labels[y[:, 1:] == tokenizer.pad_token_id] = -100\n",
        "        ids = data['source_ids'].to(device, dtype = torch.long)\n",
        "        mask = data['source_mask'].to(device, dtype = torch.long)\n",
        "\n",
        "        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=labels)\n",
        "        loss = outputs[0]\n",
        "\n",
        "        if _%100==0:\n",
        "          print (\"RUN: \" + str(_))\n",
        "          timestamp = datetime.now(pytz.timezone('America/Los_Angeles')).strftime('%Y-%m-%d %H:%M:%S %Z%z')\n",
        "          print(f\"\\t[{timestamp}]\")\n",
        "          #print(f'Epoch: {epoch}, Loss:  {loss.item()}')\n",
        "          print(f'Epoch: {epoch}, Batch Size: {ids.size(0)}, Loss:  {loss.item()}')\n",
        "\n",
        "        optimizer.zero_grad()\n",
        "        loss.backward()\n",
        "        optimizer.step()"
      ],
      "metadata": {
        "id": "-CDYww6VWBHr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Additional Layers Class***"
      ],
      "metadata": {
        "id": "_8WXla66DJhV"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch.nn.functional as F\n",
        "\n",
        "class CustomLinearLayer(nn.Module):\n",
        "    def __init__(self, input_size, output_size, activation=None):\n",
        "        super(CustomLinearLayer, self).__init__()\n",
        "        self.linear = nn.Linear(input_size, output_size)\n",
        "        self.activation = activation\n",
        "\n",
        "    def forward(self, x):\n",
        "        x = self.linear(x)\n",
        "        if self.activation is not None:\n",
        "            x = self.activation(x)\n",
        "        return x\n"
      ],
      "metadata": {
        "id": "ltwynJR6DOJr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Turned Training Function from Previous Cell into a class***"
      ],
      "metadata": {
        "id": "cc0G1jsC1zdf"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "class T5AbstractsTrainer:\n",
        "    def __init__(self, tokenizer, model, device, optimizer):\n",
        "        self.tokenizer = tokenizer\n",
        "        self.model = model\n",
        "        self.device = device\n",
        "        self.optimizer = optimizer\n",
        "\n",
        "\n",
        "        #attempt to add custom layer --- i think it works? or nor? SUS\n",
        "        self.linear_layer = nn.Linear(32128, 500)\n",
        "        #self.linear_layer = nn.Linear(15, 500)\n",
        "        self.activation = nn.Tanh()\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "    def forward(self, ids, mask, y_ids, labels):\n",
        "      outputs = self.model(input_ids=ids, attention_mask=mask, decoder_input_ids=y_ids, labels=labels)\n",
        "      loss, logits = outputs.loss, outputs.logits\n",
        "\n",
        "      # Apply custom linear layer and activation\n",
        "      logits = logits.view(-1, logits.size(-1))  # Reshape logits\n",
        "      linear_output = self.linear_layer(logits)\n",
        "      tanh_output = self.activation(linear_output).squeeze(-1)\n",
        "\n",
        "      # return loss, tanh_output\n",
        "      return loss, tanh_output\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "    def train(self, epoch, loader):\n",
        "        self.model.train()\n",
        "        self.linear_layer.to(self.device)\n",
        "        self.activation.to(self.device)\n",
        "\n",
        "\n",
        "        # Print model layers\n",
        "        #print(\"Model Layers:\")\n",
        "        #for name, layer in self.model.named_children():\n",
        "          #print(name, layer)\n",
        "\n",
        "\n",
        "        for _, data in enumerate(loader, 0):\n",
        "            y = data['target_ids'].to(self.device, dtype=torch.long)\n",
        "            y_ids = y[:, :-1].contiguous()\n",
        "            labels = y[:, 1:].clone().detach()\n",
        "            labels[y[:, 1:] == self.tokenizer.pad_token_id] = -100\n",
        "            ids = data['source_ids'].to(self.device, dtype=torch.long)\n",
        "            mask = data['source_mask'].to(self.device, dtype=torch.long)\n",
        "\n",
        "            outputs = self.model(input_ids=ids, attention_mask=mask, decoder_input_ids=y_ids, labels=labels)\n",
        "            loss = outputs[0]\n",
        "\n",
        "\n",
        "\n",
        "            if _ % 100 == 0:\n",
        "                print(\"RUN:\", _)\n",
        "                timestamp = datetime.now(pytz.timezone('America/Los_Angeles')).strftime('%Y-%m-%d %H:%M:%S %Z%z')\n",
        "                print(f\"\\t[{timestamp}]\")\n",
        "                print(f'Epoch: {epoch}, Batch Size: {ids.size(0)}, Loss: {loss.item()}')\n",
        "\n",
        "            logits = outputs.logits.to(self.device)\n",
        "            logits = logits.view(-1, logits.size(-1))  # Reshape logits to (batch_size * sequence_length, vocab_size) -- for linear layer\n",
        "\n",
        "\n",
        "            #print(\"Device of inputs (ids, mask, y_ids, labels):\", ids.device, mask.device, y_ids.device, labels.device)\n",
        "            #print(\"Device of outputs.logits:\", logits.device)\n",
        "            #print(\"Device of linear layer weight:\", self.linear_layer.weight.device)\n",
        "            #print(\"Device of linear layer bias:\", self.linear_layer.bias.device)\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "            #print(\"Output shape before linear layer and Tanh activation:\", logits.shape)\n",
        "            linear_output = self.linear_layer(logits).to(self.device)\n",
        "            tanh_output = self.activation(linear_output).to(self.device).squeeze(-1)\n",
        "\n",
        "            #print(\"tanh_output shape after linear layer and Tanh activation:\", tanh_output.shape)\n",
        "\n",
        "            #print (\"AFTER LINEAR LAYER\", outputs.logits.shape)\n",
        "\n",
        "            self.optimizer.zero_grad()\n",
        "            loss.backward()\n",
        "            self.optimizer.step()\n"
      ],
      "metadata": {
        "id": "emse28aw141n"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Create Training Data Arrays***"
      ],
      "metadata": {
        "id": "xq_T6gmSWjsP"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#Set the 2 arrays\n",
        "#Prep arrays\n",
        "import pandas as pd\n",
        "\n",
        "train_set_articles = [data['article'] for data in dataset['train']]\n",
        "train_set_abstracts = [data['abstract'] for data in dataset['train']]\n",
        "val_set_articles = [data['article'] for data in dataset['validation']]\n",
        "val_set_abstracts = [data['abstract'] for data in dataset['validation']]\n",
        "test_set_articles = [data['article'] for data in dataset['test']]\n",
        "test_set_abstracts = [data['abstract'] for data in dataset['test']]\n",
        "\n",
        "# Create DataFrame for training data\n",
        "df_train = pd.DataFrame({'article': train_set_articles, 'abstract': train_set_abstracts})\n",
        "\n",
        "# Add prefix to the article column in the training DataFrame\n",
        "df_train['article'] = 'summarize: ' + df_train['article']\n",
        "\n",
        "# Create DataFrame for validation data\n",
        "df_val = pd.DataFrame({'article': val_set_articles, 'abstract': val_set_abstracts})\n",
        "\n",
        "# Add prefix to the article column in the validation DataFrame\n",
        "df_val['article'] = 'summarize: ' + df_val['article']\n",
        "\n",
        "# Create DataFrame for test data\n",
        "df_test = pd.DataFrame({'article': test_set_articles, 'abstract': test_set_abstracts})\n",
        "\n",
        "# Add prefix to the article column in the test DataFrame\n",
        "df_test['article'] = 'summarize: ' + df_test['article']\n"
      ],
      "metadata": {
        "id": "MZpQ_zjRWqDO"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***MAIN Code to train and then predict***"
      ],
      "metadata": {
        "id": "FzrLZNUkW7vQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\n",
        "\n",
        "# Set random seeds and deterministic pytorch for reproducibility\n",
        "torch.manual_seed(42) # pytorch random seed\n",
        "np.random.seed(42) # numpy random seed\n",
        "torch.backends.cudnn.deterministic = True\n",
        "# tokenzier for encoding the text\n",
        "tokenizer = T5Tokenizer.from_pretrained(\"t5-small\")\n",
        "#print (train_set_articles[0])\n",
        "#print (\"hello\")\n",
        "\n",
        "# Creating the Training and Validation dataset for further creation of Dataloader\n",
        "training_set = CustomDataset(df_train, tokenizer, 512, 50)\n",
        "val_set = CustomDataset(df_val, tokenizer, 512, 50)\n",
        "\n",
        "# Defining the parameters for creation of dataloaders\n",
        "train_params = {\n",
        "    'batch_size': 8,\n",
        "    'shuffle': True,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "val_params = {\n",
        "    'batch_size': 8,\n",
        "    'shuffle': False,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "\n",
        "# Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.\n",
        "training_loader = DataLoader(training_set, **train_params)\n",
        "val_loader = DataLoader(val_set, **val_params)\n",
        "\n",
        "\n",
        "# Defining the model. We are using t5-base model and added a Language model layer on top for generation of Summary.\n",
        "# Further this model is sent to device (GPU/TPU) for using the hardware.\n",
        "model = T5ForConditionalGeneration.from_pretrained(\"t5-small\")\n",
        "model = model.to(device)\n",
        "\n",
        "# Defining the optimizer that will be used to tune the weights of the network in the training session.\n",
        "optimizer = torch.optim.Adam(params =  model.parameters(), lr=1e-5) # 1e-4 ended with loss Loss:  2.074270486831665, now trying e-5 -- got 2.3540332317352295 ---> so far best lr seems to be 1e-4\n",
        "\n",
        "EPOCHS = 1\n",
        "\n",
        "for epoch in range(EPOCHS):\n",
        "        train(epoch, tokenizer, model, device, training_loader, optimizer)\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "AVvYu1esW_dL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# ***Playground code***"
      ],
      "metadata": {
        "id": "GsvA9avuD3nJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "keys = dataset['train'][0].keys()\n",
        "print(keys)\n",
        "\n",
        "print (dataset['train'][0])\n",
        "# Now call the print_model function with your model instance\n",
        "#print_model(model)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1QGs63phD122",
        "outputId": "6cc087ec-30ee-42ee-c8a4-c45e720d42c1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "dict_keys(['article', 'abstract', 'section_names'])\n",
            "{'article': \"a recent systematic analysis showed that in 2011 , 314 ( 296 - 331 ) million children younger than 5 years were mildly , moderately or severely stunted and 258 ( 240 - 274 ) million were mildly , moderately or severely underweight in the developing countries .\\nin iran a study among 752 high school girls in sistan and baluchestan showed prevalence of 16.2% , 8.6% and 1.5% , for underweight , overweight and obesity , respectively .\\nthe prevalence of malnutrition among elementary school aged children in tehran varied from 6% to 16% .\\nanthropometric study of elementary school students in shiraz revealed that 16% of them suffer from malnutrition and low body weight .\\nsnack should have 300 - 400 kcal energy and could provide 5 - 10 g of protein / day . nowadays , school nutrition programs are running as the national programs , world - wide . national school lunch program in the united states\\nthere are also some reports regarding school feeding programs in developing countries . in vietnam ,\\nschool base program showed an improvement in nutrient intakes . in iran a national free food program ( nffp )\\nis implemented in elementary schools of deprived areas to cover all poor students . however , this program is not conducted in slums and poor areas of the big cities so many malnourished children with low socio - economic situation are not covered by nffp . although the rate of poverty in areas known as deprived is higher than other areas , many students in deprived areas are not actually poor and can afford food .\\nhence , nutritional value of the nffp is lower than the scientific recommended snacks for this age group .\\nfurthermore , lack of variety of food packages has decreased the tendency of children toward nffp . on the other hand ,\\nthe most important one is ministry of education ( moe ) of iran , which is responsible for selecting and providing the packages for targeted schools .\\nthe ministry of health ( moh ) is supervising the health situation of students and their health needs .\\nwelfare organizations , along with charities , have the indirect effect on nutritional status of students by financial support of their family .\\nprovincial governors have also the role of coordinating and supervising all activities of these organizations .\\nparent - teacher association is a community - based institution that participates in school 's policy such as nffp .\\nin addition to these organizations , nutritional literacy of students , their parents and teachers , is a very important issue , which could affect nutritional status of school age children .\\ntherefore , the present study was conducted with the aim of improving the nffp , so that by its resources all poor children will be covered even in big cities .\\nmoreover , all food packages were replaced by nutritious and diverse packages that were accessible for non - poor children . according to the aim of this study and multiple factors that could affect the problem ,\\npublic health advocacy has been chosen as the best strategy to deal with this issue .\\ntherefore , the present study determines the effects of nutrition intervention in an advocacy process model on the prevalence of underweight in school aged children in the poor area of shiraz , iran .\\nthis interventional study has been carried out between 2009 and 2010 in shiraz , iran .\\nthis survey was approved by the research committee of shiraz university of medical sciences . in coordination with education organization of fars province\\ntwo elementary schools and one middle school in the third region of the urban area of shiraz were selected randomly . in those schools all\\nstudents ( 2897 , 7 - 13 years old ) were screened based on their body mass index ( bmi ) by nutritionists . according to convenience method all\\nstudents divided to two groups based on their economic situation ; family revenue and head of household 's job and nutrition situation ; the first group were poor and malnourished students and the other group were well nourished or well - off students .\\nfor this report , the children 's height and weight were entered into center for disease control and prevention ( cdc ) to calculate bmi and bmi - for - age z - scores based on cdc for diseases control and prevention and growth standards .\\nthe significance of the difference between proportions was calculated using two - tailed z - tests for independent proportions . for implementing the interventions ,\\nthe advocacy process model weight was to the nearest 0.1 kg on a balance scale ( model # seca scale ) .\\nstanding height was measured to the nearest 0.1 cm with a wall - mounted stadiometer .\\nadvocacy group formation : this step was started with stakeholder analysis and identifying the stakeholders .\\nthe team was formed with representatives of all stakeholders include ; education organization , welfare organization , deputy for health of shiraz university , food and cosmetic product supervisory office and several non - governmental organizations and charities . situation analysis : this was carried out by use of existing data such as formal report of organizations , literature review and focus group with experts .\\nthe prevalence of malnutrition and its related factors among students was determined and weaknesses and strengths of the nffp were analyzed .\\naccordingly , three sub - groups were established : research and evaluation , education and justification and executive group . designing the strategies :\\nthree strategies were identified ; education and justification campaign , nutritional intervention ( providing nutritious , safe and diverse snacks ) and networking . performing the interventions : interventions that were implementing in selected schools were providing a diverse and nutritious snack package along with nutrition education for both groups while the first group ( poor and malnourished students ) was utilized the package free of charge .\\neducation and justification intervention : regarding the literature review and expert opinion , an educational group affiliated with the advocacy team has prepared educational booklets about nutritional information for each level ( degree ) .\\naccordingly , education of these booklets has been integrated into regular education of students and they educated and justified for better nutrition life - style .\\nit leads the educational group to hold several meeting with the student 's parents to justify them about the project and its benefit for their children .\\nafter these meetings , parental desire for participation in the project illustrated the effectiveness of the justification meeting with them .\\nfor educate fifteen talk show programs in tv and radio , 12 published papers in the local newspaper , have implemented to mobilize the community and gain their support .\\nhealthy diet , the importance of breakfast and snack in adolescence , wrong food habits among school age children , role of the family to improve food habit of children were the main topics , in which media campaign has focused on .\\nnutritional intervention : the snack basket of the students was replaced with traditional , nutritious and diverse foods . in general , the new snack package in average has provided 380 kcal energy , 15 g protein along with sufficient calcium and iron .\\nlow economic and malnourished children were supported by executive group affiliated with advocacy team and the rest of them prepare their snack by themselves .\\nresearch and evaluation : in this step , the literacy and anthropometric indices ( bmi ) of students were assessed before and after the interventions .\\nthe reference for anthropometric measures was the world health organization / national center for health statistics ( who / nchs ) standards and the cut - offs were - two standard deviations ( sd ) from the mean .\\neach student that was malnourished and poor has been taken into account for free food and nutritious snacks .\\ndemographic information , height , weight and knowledge of the students were measured by use of a validated and reliable ( cronbach 's alpha was 0.61 ) questionnaire .\\nthis project is granted by shiraz university of medical sciences , charities and welfare organization and education organization of fars province .\\nstatistical analyses were performed using the statistical package for the social sciences ( spss ) software , version 17.0 ( spss inc . ,\\nthe results are expressed as mean  sd and proportions as appropriated . in order to determine the effective variables on the malnutrition status\\npaired t test was used to compare the end values with baseline ones in each group .\\nin this project , the who z - score cut - offs used were as follow : using bmi - for - age z - scores ; overweight : > + 1 sd , i.e. , z - score > 1 ( equivalent to bmi 25 kg / m ) , obesity : > + 2 sd ( equivalent to bmi 30 kg / m ) , thinness : < 2 sd and severe thinness : < 3 sd .\\nthis interventional study has been carried out between 2009 and 2010 in shiraz , iran .\\nthis survey was approved by the research committee of shiraz university of medical sciences . in coordination with education organization of fars province\\ntwo elementary schools and one middle school in the third region of the urban area of shiraz were selected randomly . in those schools all\\nstudents ( 2897 , 7 - 13 years old ) were screened based on their body mass index ( bmi ) by nutritionists . according to convenience method all\\nstudents divided to two groups based on their economic situation ; family revenue and head of household 's job and nutrition situation ; the first group were poor and malnourished students and the other group were well nourished or well - off students .\\nfor this report , the children 's height and weight were entered into center for disease control and prevention ( cdc ) to calculate bmi and bmi - for - age z - scores based on cdc for diseases control and prevention and growth standards .\\nthe significance of the difference between proportions was calculated using two - tailed z - tests for independent proportions . for implementing the interventions ,\\nweight was to the nearest 0.1 kg on a balance scale ( model # seca scale ) .\\nstanding height was measured to the nearest 0.1 cm with a wall - mounted stadiometer .\\nadvocacy group formation : this step was started with stakeholder analysis and identifying the stakeholders .\\nthe team was formed with representatives of all stakeholders include ; education organization , welfare organization , deputy for health of shiraz university , food and cosmetic product supervisory office and several non - governmental organizations and charities . situation analysis : this was carried out by use of existing data such as formal report of organizations , literature review and focus group with experts .\\nthe prevalence of malnutrition and its related factors among students was determined and weaknesses and strengths of the nffp were analyzed .\\naccordingly , three sub - groups were established : research and evaluation , education and justification and executive group . designing the strategies :\\nthree strategies were identified ; education and justification campaign , nutritional intervention ( providing nutritious , safe and diverse snacks ) and networking . performing the interventions : interventions that were implementing in selected schools were providing a diverse and nutritious snack package along with nutrition education for both groups while the first group ( poor and malnourished students ) was utilized the package free of charge . duration of intervention was 6 months .\\neducation and justification intervention : regarding the literature review and expert opinion , an educational group affiliated with the advocacy team has prepared educational booklets about nutritional information for each level ( degree ) .\\naccordingly , education of these booklets has been integrated into regular education of students and they educated and justified for better nutrition life - style . obviously , student 's families had remarkable effect on children 's food habit .\\nit leads the educational group to hold several meeting with the student 's parents to justify them about the project and its benefit for their children .\\nafter these meetings , parental desire for participation in the project illustrated the effectiveness of the justification meeting with them .\\neducate fifteen talk show programs in tv and radio , 12 published papers in the local newspaper , have implemented to mobilize the community and gain their support .\\nhealthy diet , the importance of breakfast and snack in adolescence , wrong food habits among school age children , role of the family to improve food habit of children were the main topics , in which media campaign has focused on .\\nnutritional intervention : the snack basket of the students was replaced with traditional , nutritious and diverse foods . in general , the new snack package in average has provided 380 kcal energy , 15 g protein along with sufficient calcium and iron .\\nlow economic and malnourished children were supported by executive group affiliated with advocacy team and the rest of them prepare their snack by themselves .\\nresearch and evaluation : in this step , the literacy and anthropometric indices ( bmi ) of students were assessed before and after the interventions .\\nthe reference for anthropometric measures was the world health organization / national center for health statistics ( who / nchs ) standards and the cut - offs were - two standard deviations ( sd ) from the mean .\\neach student that was malnourished and poor has been taken into account for free food and nutritious snacks .\\ndemographic information , height , weight and knowledge of the students were measured by use of a validated and reliable ( cronbach 's alpha was 0.61 ) questionnaire .\\nthis project is granted by shiraz university of medical sciences , charities and welfare organization and education organization of fars province .\\nadvocacy group formation : this step was started with stakeholder analysis and identifying the stakeholders .\\nthe team was formed with representatives of all stakeholders include ; education organization , welfare organization , deputy for health of shiraz university , food and cosmetic product supervisory office and several non - governmental organizations and charities .\\nsituation analysis : this was carried out by use of existing data such as formal report of organizations , literature review and focus group with experts .\\nthe prevalence of malnutrition and its related factors among students was determined and weaknesses and strengths of the nffp were analyzed .\\naccordingly , three sub - groups were established : research and evaluation , education and justification and executive group .\\ndesigning the strategies : three strategies were identified ; education and justification campaign , nutritional intervention ( providing nutritious , safe and diverse snacks ) and networking .\\nperforming the interventions : interventions that were implementing in selected schools were providing a diverse and nutritious snack package along with nutrition education for both groups while the first group ( poor and malnourished students ) was utilized the package free of charge .\\neducation and justification intervention : regarding the literature review and expert opinion , an educational group affiliated with the advocacy team has prepared educational booklets about nutritional information for each level ( degree ) .\\naccordingly , education of these booklets has been integrated into regular education of students and they educated and justified for better nutrition life - style . obviously , student 's families had remarkable effect on children 's food habit .\\nit leads the educational group to hold several meeting with the student 's parents to justify them about the project and its benefit for their children .\\nafter these meetings , parental desire for participation in the project illustrated the effectiveness of the justification meeting with them .\\neducate fifteen talk show programs in tv and radio , 12 published papers in the local newspaper , have implemented to mobilize the community and gain their support .\\nhealthy diet , the importance of breakfast and snack in adolescence , wrong food habits among school age children , role of the family to improve food habit of children were the main topics , in which media campaign has focused on . nutritional intervention : the snack basket of the students\\nwas replaced with traditional , nutritious and diverse foods . in general , the new snack package in average has provided 380 kcal energy , 15 g protein along with sufficient calcium and iron .\\nlow economic and malnourished children were supported by executive group affiliated with advocacy team and the rest of them prepare their snack by themselves .\\nresearch and evaluation : in this step , the literacy and anthropometric indices ( bmi ) of students were assessed before and after the interventions .\\nthe reference for anthropometric measures was the world health organization / national center for health statistics ( who / nchs ) standards and the cut - offs were - two standard deviations ( sd ) from the mean .\\neach student that was malnourished and poor has been taken into account for free food and nutritious snacks .\\ndemographic information , height , weight and knowledge of the students were measured by use of a validated and reliable ( cronbach 's alpha was 0.61 ) questionnaire .\\nthis project is granted by shiraz university of medical sciences , charities and welfare organization and education organization of fars province .\\nstatistical analyses were performed using the statistical package for the social sciences ( spss ) software , version 17.0 ( spss inc . , chicago , il , usa ) .\\nthe results are expressed as mean  sd and proportions as appropriated . in order to determine the effective variables on the malnutrition status\\npaired t test was used to compare the end values with baseline ones in each group .\\ntwo - sided p < 0.05 was considered to be statistically significant . in this project ,\\nthe who z - score cut - offs used were as follow : using bmi - for - age z - scores ; overweight : > + 1 sd , i.e. , z - score > 1 ( equivalent to bmi 25 kg / m ) , obesity : > + 2 sd ( equivalent to bmi 30\\nkg / m ) , thinness : < 2 sd and severe thinness : < 3 sd .\\nstudy population contains 2897 children ; 70.8% were primary school students and 29.2% were secondary school students .\\n2336 ( 80.5% ) out of total students were well - off and 561 children ( 19.5% ) were indigent .\\n19.5% of subjects were in case group ( n = 561 ) and 80.5% were in the control group ( n = 2336 ) .\\nthe mean of age in welfare group was 10.0  2.3 and 10.5  2.5 in non - welfare group .\\ndemographic characteristics of school aged children in shiraz , iran table 2 shows the frequency of subjects in different categories of bmi for age in non - welfare and welfare groups of school aged children separately among boys and girls before and after a nutrition intervention based on advocacy process model in shiraz , iran .\\nthe frequency of subjects with bmi lower than < 2 sd decreased significantly after intervention among non - welfare girls ( p < 0.01 ) .\\nhowever , there were no significant decreases in the frequency of subjects with bmi lower than < 2 sd boys .\\nwhen we assess the effect of intervention in total population without separating by sex groups , we found no significant change in this population [ table 3 ] .\\nbmi for age for iranian students aged 7 - 14 years based on gender according to who growth standards 2007 bmi for age for iranian students aged 7 - 14 years according to who growth standards 2007 in non - welfare and welfare groups of total population table 4 has shown the prevalence of normal bmi , mild , moderate and severe malnutrition in non - welfare and welfare groups of school aged children separately among boys and girls before and after a nutrition intervention based on advocacy process model . according to this table\\nthere were no significant differences in the prevalence of mild , moderate and severe malnutrition among girls and boys .\\ntable 4 also shows the mean of all anthropometric indices changed significantly after intervention both among girls and boys .\\nthe pre- and post - test education assessment in both groups showed that the student 's average knowledge score has been significantly increased from 12.5  3.2 to 16.8  4.3 ( p < 0.0001 ) .\\nbmi , height and weight in non - welfare and welfare groups of school aged children separately in males and females before and after a nutrition intervention based on advocacy process model in shiraz , iran according to study 's finding the odds ratio ( or ) of sever thinness and thinness in non - welfare compared with welfare is 3.5 ( or = 3.5 , confidence interval [ ci ] = 2.5 - 3.9 , p < 0.001 ) .\\nfurthermore , the finding showed or of overweight and obesity in welfare compared to non - welfare is 19.3 ( or = 19.3 , ci = 2.5 - 3.9 , p = 0.04 ) .\\nthe result of this community intervention study revealed that nutrition intervention based on advocacy program had been successful to reduce the prevalence of underweight among poor girls .\\nthis study shows determinant factor of nutritional status of school age children was their socio - economic level . according to our knowledge ,\\nthis is the first study , which determines the effect of a community intervention based on advocacy process on the malnutrition indices in a big city ( shiraz ) in iran .\\nthe other program in iran ( nffp ) is specified to deprived area and is not conducted in big cities .\\nallocating millions of dollars to nffp by government , selecting the malnourished students through an active screening system at primary and middle schools , paying attention of policy makers to student 's nutrition have provided the opportunity to combat the problem . however , negligence of under - poverty line , providing poor snacks in terms of nutritional value and lack of variety are the main defects of this program .\\nadvocacy by definition is a blending of science , ethics and politics for comprehensive approaching health issues . by using advocacy program in california among the high school students for improving their nutrition and physical activity\\nangeles unified school district participants emphasized on nutrition classes for families as well as students in addition to other interventions . in the present study\\nanother study revealed that evaluability assessment gave stakeholders the opportunity to reflect on the project and its implementation issues .\\nit seems that in iran , free food program among the students not only is needed in deprived areas , but also it should be performed in big cities such as shiraz . at baseline ,\\nno significant difference was founded among wealthy students between the pre- and post - nutritional status intervention .\\nin contrast , the numbers of students who have malnutrition decreased from 44% to 39.4% , which was identified as a significant among impecunious girls students .\\nthere was also a significant increase in the proportion of children with bmi that was normal for age ( 2 to + 1 sd ) most of the published community interventions showed better results among females compared with males .\\nthis difference in the impact of nutritional interventions between male and female might be related to the different age of puberty in the female population compared to the male population . in the age range of the present study female\\nalthough , there is no nffp in big cities of iran , there are some programs for improving the nutritional status such as providing free milk in schools .\\na recent publication has shown that school feeding programs focus on milk supplementation had beneficial effects on the physical function and school performances specifically among girls in iran .\\nthe results of the mentioned study showed an improvement in the weight of children , psychological test 's scores and the grade - point average following this school feeding program .\\nthe intervention in the present study had focused on the snack intake in the school time .\\nthere are some reports regarding the nutrition transition in iran , which shows the importance of nutrition intervention to provide more healthy eating dietary habits among welfare groups of adolescents .\\nhence , nutrition intervention especially in the form of nutrition education is needed in big cities and among welfare children and adolescents . although a study among iranian adolescents showed that dietary behavior of adolescents does not accord to their knowledge , which emphasize on the necessity of community intervention programs . a recent study regarding the major dietary pattern among iranian children showed the presence of four major dietary patterns , in which fast food pattern and sweet pattern as two major dietary patterns can be mentioned among iranian children . in advocacy program audience 's analysis\\naccordingly , one of the prominent strategies in this study was working with media and was meeting with parent - teacher association that both of them were secondary target audiences\\n. we also took into account policy makers in different levels , from national to local as primary audiences .\\nadvocacy team had several meetings with management and planning organization at national level and education organization of the fars province as well as principal of the targeted schools .\\nproviding nutritious snacks need contribution of private sector such as food industries or factories , but their benefits should be warranted .\\nanother choice was community involvement ; which can be achieved by female health volunteers who are working with the health system .\\nadvocacy team by using the support of charities and female health volunteers could establish a local factory that produced student 's snacks based on the new definition . however , there are some challenges on the way of expanding this program .\\nmass production of the proposed snacks according to different desires and cultures and getting involvement of food industries with respect to marketing issues is one of those challenges .\\nmoreover , providing a supportive environment in order to change the food habits of the students and their parents among the wide range of the population require a sustainable and continuous inter - sector collaboration .\\nalthough in a limited number of schools , in our study , interventions and advocacy program was successful , expanding this model to another areas around the country depends on convincing the policy makers at national level . in this\\nregard , advocacy team should prepare evidenced based profile and transitional planning to convince the policy makers for improving the rule and regulation of nffp .\\nthe same as this study in other studies have also emphasized that there must be efforts to strengthen the capacity within the schools to deal with the nutritional problems either overweight , obesity or malnutrition by using of educational and nutritional intervention .\\nassessing the dietary adherence is very important in nutrition intervention among population . as this population was children and adolescents we had a limitation in the blood sample collection to assess the subject 's dietary adherence .\\nfurthermore , this intervention was only focused on the intake of snack in school time and we did not have comprehensive information on the dietary intake of children and adolescents after school all over the day .\\nthe investigators propose further investigation in different areas of the country based on socio - cultural differences in order to make necessary modification and adapt this model to other areas .\\nregarding the nutritional needs of the school age children , provision of a good platform for implementing and expanding this efficient model to the whole country based upon the socio - economic situation of each region is advisable to the moh and the moe .\\ncommunity nutrition intervention based on the advocacy process model is effective on reducing the prevalence of underweight specifically among female school aged children .\", 'abstract': \" background : the present study was carried out to assess the effects of community nutrition intervention based on advocacy approach on malnutrition status among school - aged children in shiraz , iran.materials and methods : this case - control nutritional intervention has been done between 2008 and 2009 on 2897 primary and secondary school boys and girls ( 7 - 13 years old ) based on advocacy approach in shiraz , iran . \\n the project provided nutritious snacks in public schools over a 2-year period along with advocacy oriented actions in order to implement and promote nutritional intervention . for evaluation of effectiveness of the intervention growth monitoring indices of pre- and post - intervention were statistically compared.results:the frequency of subjects with body mass index lower than 5% decreased significantly after intervention among girls ( p = 0.02 ) . \\n however , there were no significant changes among boys or total population . \\n the mean of all anthropometric indices changed significantly after intervention both among girls and boys as well as in total population . \\n the pre- and post - test education assessment in both groups showed that the student 's average knowledge score has been significantly increased from 12.5  3.2 to 16.8  4.3 ( p < 0.0001).conclusion : this study demonstrates the potential success and scalability of school feeding programs in iran . \\n community nutrition intervention based on the advocacy process model is effective on reducing the prevalence of underweight specifically among female school aged children . \", 'section_names': 'INTRODUCTION\\nMATERIALS AND METHODS\\nParticipants\\nInstruments\\nProcedure\\nFirst step\\nSecond step\\nThird step\\nForth step\\nInterventions\\nFifth step (assessment)\\nData analysis\\nRESULTS\\nDISCUSSION\\nCONCLUSION'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Peft/LORA Code to train***"
      ],
      "metadata": {
        "id": "G_V8sckwD6rD"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Set random seeds and deterministic pytorch for reproducibility\n",
        "#torch.manual_seed(42) # pytorch random seed\n",
        "np.random.seed(42) # numpy random seed\n",
        "torch.backends.cudnn.deterministic = True\n",
        "\n",
        "# tokenzier for encoding the text\n",
        "tokenizer = T5Tokenizer.from_pretrained(\"t5-base\")\n",
        "\n",
        "\n",
        "MAX_LEN = 700\n",
        "SUMMARY_LEN = 150\n",
        "# Creating the Training and Validation dataset for further creation of Dataloader\n",
        "training_set = CustomDataset(df_train, tokenizer, MAX_LEN, SUMMARY_LEN)\n",
        "val_set = CustomDataset(df_val, tokenizer, MAX_LEN, SUMMARY_LEN)\n",
        "BATCHSIZE = 7\n",
        "# Defining the parameters for creation of dataloaders\n",
        "train_params = {\n",
        "    'batch_size': BATCHSIZE,\n",
        "    'shuffle': True,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "val_params = {\n",
        "    'batch_size': BATCHSIZE,\n",
        "    'shuffle': False,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "\n",
        "# Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.\n",
        "training_loader = DataLoader(training_set, **train_params)\n",
        "val_loader = DataLoader(val_set, **val_params)\n",
        "\n",
        "# define configuration for peft (source: https://www.philschmid.de/fine-tune-flan-t5-peft, https://huggingface.co/blog/peft)\n",
        "peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM,\n",
        "                               inference_mode=False,\n",
        "                               r=8,\n",
        "                               lora_alpha=32,\n",
        "                               lora_dropout=0.1)\n",
        "# lora_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM,\n",
        "#                               inference_mode=False,\n",
        "#                               r=16,\n",
        "#                               bias='lora_only',\n",
        "#                               use_rslora=True,\n",
        "#                               lora_dropout=0.1,\n",
        "#                               )\n",
        "\n",
        "# Defining the  PEFT model. We are using t5-base model and added a Language model layer on top for generation of Summary.\n",
        "# Further this model is sent to device (GPU/TPU) for using the hardware.\n",
        "model = get_peft_model(T5ForConditionalGeneration.from_pretrained('t5-base',output_hidden_states=True), peft_config)\n",
        "model.to(device)\n",
        "\n",
        "# take a peek at the peft model for comparison to non-peft\n",
        "print(f\"Preview of trainable parameters in the peft model\\n\")\n",
        "model.print_trainable_parameters()\n",
        "\n",
        "# Defining the optimizer that will be used to tune the weights of the network in the training session.\n",
        "optimizer = torch.optim.Adam(params =  model.parameters(), lr=3e-4) # 1e-4 ended with loss Loss:  2.074270486831665, now trying e-5 -- got 2.3540332317352295 ---> so far best lr seems to be 1e-4. 3e-4 got sub 2 loss, so pretty decent\n",
        "\n",
        "#summary(model, input_size=[(batch_size, input_length)])\n",
        "#summary(model, input_size=[(8, 6)])\n",
        "# print (model)\n",
        "\n",
        "\n",
        "EPOCHS = 1\n",
        "trainer = T5AbstractsTrainer(tokenizer, model, device, optimizer)\n",
        "for epoch in range(EPOCHS):\n",
        "    trainer.train(epoch, training_loader)\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "#for epoch in range(EPOCHS):\n",
        "#        train(epoch, tokenizer, model, device, training_loader, optimizer)\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "L83qp1qTqKwD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "df86858a-80b9-4ff0-9ad2-a4e721d88dd7"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Preview of trainable parameters in the peft model\n",
            "\n",
            "trainable params: 884,736 || all params: 223,788,288 || trainable%: 0.3953450861557152\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2645: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "RUN: 0\n",
            "\t[2024-04-06 14:18:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 7.345491409301758\n",
            "RUN: 100\n",
            "\t[2024-04-06 14:19:33 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5926971435546875\n",
            "RUN: 200\n",
            "\t[2024-04-06 14:20:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.7770352363586426\n",
            "RUN: 300\n",
            "\t[2024-04-06 14:20:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.458217144012451\n",
            "RUN: 400\n",
            "\t[2024-04-06 14:21:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.6213810443878174\n",
            "RUN: 500\n",
            "\t[2024-04-06 14:22:07 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.6450307369232178\n",
            "RUN: 600\n",
            "\t[2024-04-06 14:22:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1673409938812256\n",
            "RUN: 700\n",
            "\t[2024-04-06 14:23:25 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.455289363861084\n",
            "RUN: 800\n",
            "\t[2024-04-06 14:24:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2111077308654785\n",
            "RUN: 900\n",
            "\t[2024-04-06 14:24:44 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5103116035461426\n",
            "RUN: 1000\n",
            "\t[2024-04-06 14:25:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1050968170166016\n",
            "RUN: 1100\n",
            "\t[2024-04-06 14:26:01 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4426348209381104\n",
            "RUN: 1200\n",
            "\t[2024-04-06 14:26:40 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4519267082214355\n",
            "RUN: 1300\n",
            "\t[2024-04-06 14:27:19 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.894439458847046\n",
            "RUN: 1400\n",
            "\t[2024-04-06 14:27:57 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.7471885681152344\n",
            "RUN: 1500\n",
            "\t[2024-04-06 14:28:36 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1343002319335938\n",
            "RUN: 1600\n",
            "\t[2024-04-06 14:29:15 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.479642868041992\n",
            "RUN: 1700\n",
            "\t[2024-04-06 14:29:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.8243008852005005\n",
            "RUN: 1800\n",
            "\t[2024-04-06 14:30:32 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2866976261138916\n",
            "RUN: 1900\n",
            "\t[2024-04-06 14:31:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0868868827819824\n",
            "RUN: 2000\n",
            "\t[2024-04-06 14:31:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2388217449188232\n",
            "RUN: 2100\n",
            "\t[2024-04-06 14:32:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.6250971555709839\n",
            "RUN: 2200\n",
            "\t[2024-04-06 14:33:08 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1125521659851074\n",
            "RUN: 2300\n",
            "\t[2024-04-06 14:33:47 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1230716705322266\n",
            "RUN: 2400\n",
            "\t[2024-04-06 14:34:25 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.990001916885376\n",
            "RUN: 2500\n",
            "\t[2024-04-06 14:35:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.7773154973983765\n",
            "RUN: 2600\n",
            "\t[2024-04-06 14:35:42 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.109410524368286\n",
            "RUN: 2700\n",
            "\t[2024-04-06 14:36:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4853622913360596\n",
            "RUN: 2800\n",
            "\t[2024-04-06 14:37:00 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1715009212493896\n",
            "RUN: 2900\n",
            "\t[2024-04-06 14:37:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9738967418670654\n",
            "RUN: 3000\n",
            "\t[2024-04-06 14:38:18 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9068458080291748\n",
            "RUN: 3100\n",
            "\t[2024-04-06 14:38:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.6270484924316406\n",
            "RUN: 3200\n",
            "\t[2024-04-06 14:39:37 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.5709248781204224\n",
            "RUN: 3300\n",
            "\t[2024-04-06 14:40:15 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9725146293640137\n",
            "RUN: 3400\n",
            "\t[2024-04-06 14:40:55 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2789676189422607\n",
            "RUN: 3500\n",
            "\t[2024-04-06 14:41:34 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.626394748687744\n",
            "RUN: 3600\n",
            "\t[2024-04-06 14:42:13 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.222804069519043\n",
            "RUN: 3700\n",
            "\t[2024-04-06 14:42:51 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9746259450912476\n",
            "RUN: 3800\n",
            "\t[2024-04-06 14:43:30 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.444126844406128\n",
            "RUN: 3900\n",
            "\t[2024-04-06 14:44:08 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.332143545150757\n",
            "RUN: 4000\n",
            "\t[2024-04-06 14:44:47 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.070868730545044\n",
            "RUN: 4100\n",
            "\t[2024-04-06 14:45:27 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.420790433883667\n",
            "RUN: 4200\n",
            "\t[2024-04-06 14:46:06 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.710693597793579\n",
            "RUN: 4300\n",
            "\t[2024-04-06 14:46:44 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1878812313079834\n",
            "RUN: 4400\n",
            "\t[2024-04-06 14:47:23 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.6605942249298096\n",
            "RUN: 4500\n",
            "\t[2024-04-06 14:48:02 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.505498170852661\n",
            "RUN: 4600\n",
            "\t[2024-04-06 14:48:40 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.094806671142578\n",
            "RUN: 4700\n",
            "\t[2024-04-06 14:49:19 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2929141521453857\n",
            "RUN: 4800\n",
            "\t[2024-04-06 14:49:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.8721275329589844\n",
            "RUN: 4900\n",
            "\t[2024-04-06 14:50:36 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.555238723754883\n",
            "RUN: 5000\n",
            "\t[2024-04-06 14:51:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.942868709564209\n",
            "RUN: 5100\n",
            "\t[2024-04-06 14:51:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5061867237091064\n",
            "RUN: 5200\n",
            "\t[2024-04-06 14:52:33 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4272067546844482\n",
            "RUN: 5300\n",
            "\t[2024-04-06 14:53:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.8751022815704346\n",
            "RUN: 5400\n",
            "\t[2024-04-06 14:53:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2831008434295654\n",
            "RUN: 5500\n",
            "\t[2024-04-06 14:54:28 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5547640323638916\n",
            "RUN: 5600\n",
            "\t[2024-04-06 14:55:07 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.096998929977417\n",
            "RUN: 5700\n",
            "\t[2024-04-06 14:55:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.764601707458496\n",
            "RUN: 5800\n",
            "\t[2024-04-06 14:56:24 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0306177139282227\n",
            "RUN: 5900\n",
            "\t[2024-04-06 14:57:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.328599691390991\n",
            "RUN: 6000\n",
            "\t[2024-04-06 14:57:42 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0558722019195557\n",
            "RUN: 6100\n",
            "\t[2024-04-06 14:58:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.136464834213257\n",
            "RUN: 6200\n",
            "\t[2024-04-06 14:58:59 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.6811892986297607\n",
            "RUN: 6300\n",
            "\t[2024-04-06 14:59:38 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4783194065093994\n",
            "RUN: 6400\n",
            "\t[2024-04-06 15:00:17 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2842166423797607\n",
            "RUN: 6500\n",
            "\t[2024-04-06 15:00:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1190028190612793\n",
            "RUN: 6600\n",
            "\t[2024-04-06 15:01:34 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.8757424354553223\n",
            "RUN: 6700\n",
            "\t[2024-04-06 15:02:13 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.17549204826355\n",
            "RUN: 6800\n",
            "\t[2024-04-06 15:02:52 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.309190034866333\n",
            "RUN: 6900\n",
            "\t[2024-04-06 15:03:30 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0022590160369873\n",
            "RUN: 7000\n",
            "\t[2024-04-06 15:04:09 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5449843406677246\n",
            "RUN: 7100\n",
            "\t[2024-04-06 15:04:47 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.442486047744751\n",
            "RUN: 7200\n",
            "\t[2024-04-06 15:05:26 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.3012547492980957\n",
            "RUN: 7300\n",
            "\t[2024-04-06 15:06:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.315365791320801\n",
            "RUN: 7400\n",
            "\t[2024-04-06 15:06:43 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.293565511703491\n",
            "RUN: 7500\n",
            "\t[2024-04-06 15:07:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.3204219341278076\n",
            "RUN: 7600\n",
            "\t[2024-04-06 15:08:00 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 3.0721945762634277\n",
            "RUN: 7700\n",
            "\t[2024-04-06 15:08:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4089126586914062\n",
            "RUN: 7800\n",
            "\t[2024-04-06 15:09:18 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.14528751373291\n",
            "RUN: 7900\n",
            "\t[2024-04-06 15:09:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.031372547149658\n",
            "RUN: 8000\n",
            "\t[2024-04-06 15:10:35 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4208362102508545\n",
            "RUN: 8100\n",
            "\t[2024-04-06 15:11:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.787327527999878\n",
            "RUN: 8200\n",
            "\t[2024-04-06 15:11:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.3338613510131836\n",
            "RUN: 8300\n",
            "\t[2024-04-06 15:12:32 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.7846585512161255\n",
            "RUN: 8400\n",
            "\t[2024-04-06 15:13:10 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.218531847000122\n",
            "RUN: 8500\n",
            "\t[2024-04-06 15:13:49 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.251535654067993\n",
            "RUN: 8600\n",
            "\t[2024-04-06 15:14:28 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.234365224838257\n",
            "RUN: 8700\n",
            "\t[2024-04-06 15:15:06 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9417508840560913\n",
            "RUN: 8800\n",
            "\t[2024-04-06 15:15:45 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.108081579208374\n",
            "RUN: 8900\n",
            "\t[2024-04-06 15:16:24 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.087236166000366\n",
            "RUN: 9000\n",
            "\t[2024-04-06 15:17:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9784916639328003\n",
            "RUN: 9100\n",
            "\t[2024-04-06 15:17:42 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0951807498931885\n",
            "RUN: 9200\n",
            "\t[2024-04-06 15:18:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5239052772521973\n",
            "RUN: 9300\n",
            "\t[2024-04-06 15:18:59 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.076720714569092\n",
            "RUN: 9400\n",
            "\t[2024-04-06 15:19:38 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9608899354934692\n",
            "RUN: 9500\n",
            "\t[2024-04-06 15:20:17 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.147498607635498\n",
            "RUN: 9600\n",
            "\t[2024-04-06 15:20:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.155272960662842\n",
            "RUN: 9700\n",
            "\t[2024-04-06 15:21:35 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9340800046920776\n",
            "RUN: 9800\n",
            "\t[2024-04-06 15:22:13 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2947983741760254\n",
            "RUN: 9900\n",
            "\t[2024-04-06 15:22:52 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9517340660095215\n",
            "RUN: 10000\n",
            "\t[2024-04-06 15:23:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.200854539871216\n",
            "RUN: 10100\n",
            "\t[2024-04-06 15:24:09 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.3570566177368164\n",
            "RUN: 10200\n",
            "\t[2024-04-06 15:24:48 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.718507170677185\n",
            "RUN: 10300\n",
            "\t[2024-04-06 15:25:27 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.132185697555542\n",
            "RUN: 10400\n",
            "\t[2024-04-06 15:26:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.295841693878174\n",
            "RUN: 10500\n",
            "\t[2024-04-06 15:26:44 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.270869731903076\n",
            "RUN: 10600\n",
            "\t[2024-04-06 15:27:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.363208055496216\n",
            "RUN: 10700\n",
            "\t[2024-04-06 15:28:01 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2236804962158203\n",
            "RUN: 10800\n",
            "\t[2024-04-06 15:28:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.875204086303711\n",
            "RUN: 10900\n",
            "\t[2024-04-06 15:29:18 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0718767642974854\n",
            "RUN: 11000\n",
            "\t[2024-04-06 15:29:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9058114290237427\n",
            "RUN: 11100\n",
            "\t[2024-04-06 15:30:35 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9947818517684937\n",
            "RUN: 11200\n",
            "\t[2024-04-06 15:31:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4971275329589844\n",
            "RUN: 11300\n",
            "\t[2024-04-06 15:31:52 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.112452268600464\n",
            "RUN: 11400\n",
            "\t[2024-04-06 15:32:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4227683544158936\n",
            "RUN: 11500\n",
            "\t[2024-04-06 15:33:10 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4685354232788086\n",
            "RUN: 11600\n",
            "\t[2024-04-06 15:33:48 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0248069763183594\n",
            "RUN: 11700\n",
            "\t[2024-04-06 15:34:27 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.149091958999634\n",
            "RUN: 11800\n",
            "\t[2024-04-06 15:35:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.5769801139831543\n",
            "RUN: 11900\n",
            "\t[2024-04-06 15:35:45 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2936229705810547\n",
            "RUN: 12000\n",
            "\t[2024-04-06 15:36:23 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1652474403381348\n",
            "RUN: 12100\n",
            "\t[2024-04-06 15:37:02 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4616682529449463\n",
            "RUN: 12200\n",
            "\t[2024-04-06 15:37:41 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.100414752960205\n",
            "RUN: 12300\n",
            "\t[2024-04-06 15:38:20 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.192965269088745\n",
            "RUN: 12400\n",
            "\t[2024-04-06 15:38:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9227285385131836\n",
            "RUN: 12500\n",
            "\t[2024-04-06 15:39:38 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.059441328048706\n",
            "RUN: 12600\n",
            "\t[2024-04-06 15:40:16 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.016479730606079\n",
            "RUN: 12700\n",
            "\t[2024-04-06 15:40:55 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.782459020614624\n",
            "RUN: 12800\n",
            "\t[2024-04-06 15:41:33 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.025508403778076\n",
            "RUN: 12900\n",
            "\t[2024-04-06 15:42:12 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.275740623474121\n",
            "RUN: 13000\n",
            "\t[2024-04-06 15:42:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.7683725357055664\n",
            "RUN: 13100\n",
            "\t[2024-04-06 15:43:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.7522331476211548\n",
            "RUN: 13200\n",
            "\t[2024-04-06 15:44:08 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2519843578338623\n",
            "RUN: 13300\n",
            "\t[2024-04-06 15:44:47 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.7400994300842285\n",
            "RUN: 13400\n",
            "\t[2024-04-06 15:45:25 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2947728633880615\n",
            "RUN: 13500\n",
            "\t[2024-04-06 15:46:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.110961437225342\n",
            "RUN: 13600\n",
            "\t[2024-04-06 15:46:43 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9077165126800537\n",
            "RUN: 13700\n",
            "\t[2024-04-06 15:47:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.975692629814148\n",
            "RUN: 13800\n",
            "\t[2024-04-06 15:48:00 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.7291665077209473\n",
            "RUN: 13900\n",
            "\t[2024-04-06 15:48:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.5352158546447754\n",
            "RUN: 14000\n",
            "\t[2024-04-06 15:49:18 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.851457118988037\n",
            "RUN: 14100\n",
            "\t[2024-04-06 15:49:57 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.3056094646453857\n",
            "RUN: 14200\n",
            "\t[2024-04-06 15:50:35 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.4367308616638184\n",
            "RUN: 14300\n",
            "\t[2024-04-06 15:51:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.972453236579895\n",
            "RUN: 14400\n",
            "\t[2024-04-06 15:51:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.6381170749664307\n",
            "RUN: 14500\n",
            "\t[2024-04-06 15:52:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.13793683052063\n",
            "RUN: 14600\n",
            "\t[2024-04-06 15:53:10 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.258348226547241\n",
            "RUN: 14700\n",
            "\t[2024-04-06 15:53:48 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9490060806274414\n",
            "RUN: 14800\n",
            "\t[2024-04-06 15:54:27 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.887129545211792\n",
            "RUN: 14900\n",
            "\t[2024-04-06 15:55:06 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.079538106918335\n",
            "RUN: 15000\n",
            "\t[2024-04-06 15:55:45 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.507418155670166\n",
            "RUN: 15100\n",
            "\t[2024-04-06 15:56:23 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.6581478118896484\n",
            "RUN: 15200\n",
            "\t[2024-04-06 15:57:02 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.7069363594055176\n",
            "RUN: 15300\n",
            "\t[2024-04-06 15:57:41 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.152655601501465\n",
            "RUN: 15400\n",
            "\t[2024-04-06 15:58:19 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.70895254611969\n",
            "RUN: 15500\n",
            "\t[2024-04-06 15:58:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1890311241149902\n",
            "RUN: 15600\n",
            "\t[2024-04-06 15:59:36 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.20723819732666\n",
            "RUN: 15700\n",
            "\t[2024-04-06 16:00:15 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1967313289642334\n",
            "RUN: 15800\n",
            "\t[2024-04-06 16:00:54 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.155057430267334\n",
            "RUN: 15900\n",
            "\t[2024-04-06 16:01:33 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.259883403778076\n",
            "RUN: 16000\n",
            "\t[2024-04-06 16:02:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.9070746898651123\n",
            "RUN: 16100\n",
            "\t[2024-04-06 16:02:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1307034492492676\n",
            "RUN: 16200\n",
            "\t[2024-04-06 16:03:28 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.0917117595672607\n",
            "RUN: 16300\n",
            "\t[2024-04-06 16:04:07 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 1.774440050125122\n",
            "RUN: 16400\n",
            "\t[2024-04-06 16:04:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.3799731731414795\n",
            "RUN: 16500\n",
            "\t[2024-04-06 16:05:25 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1690852642059326\n",
            "RUN: 16600\n",
            "\t[2024-04-06 16:06:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1154837608337402\n",
            "RUN: 16700\n",
            "\t[2024-04-06 16:06:42 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.1110997200012207\n",
            "RUN: 16800\n",
            "\t[2024-04-06 16:07:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.37376070022583\n",
            "RUN: 16900\n",
            "\t[2024-04-06 16:08:00 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.2258262634277344\n",
            "RUN: 17000\n",
            "\t[2024-04-06 16:08:38 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.7228713035583496\n",
            "RUN: 17100\n",
            "\t[2024-04-06 16:09:17 PDT-0700]\n",
            "Epoch: 0, Batch Size: 7, Loss: 2.028247117996216\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Testing save to hugging face function"
      ],
      "metadata": {
        "id": "b_vn6morvvAa"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# save_to_hf(trainer.model, tokenizer, model_name='t5_base_peft')"
      ],
      "metadata": {
        "id": "Wms7VAcY0T_x",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c014114d-1082-46e7-b04d-ffda32b3b646"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2024-04-06 23:09:35.453978: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-04-06 23:09:35.454022: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-04-06 23:09:35.455334: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-04-06 23:09:36.450006: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "\u001b[1m\u001b[31mERROR! `huggingface-cli login` uses an outdated login mechanism that is not compatible with the Hugging Face Hub backend anymore. Please use `huggingface-cli login instead.\u001b[0m\n",
            "2024-04-06 23:09:45.318460: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-04-06 23:09:45.318529: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-04-06 23:09:45.319754: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-04-06 23:09:46.323826: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "usage: transformers-cli <command> [<args>]\n",
            "Transformers CLI tool: error: argument {convert,download,env,run,serve,login,whoami,logout,repo,add-new-model,add-new-model-like,lfs-enable-largefiles,lfs-multipart-upload,pt-to-tf}: invalid choice: 'upload' (choose from 'convert', 'download', 'env', 'run', 'serve', 'login', 'whoami', 'logout', 'repo', 'add-new-model', 'add-new-model-like', 'lfs-enable-largefiles', 'lfs-multipart-upload', 'pt-to-tf')\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
        "\n",
        "# load model from huggingface\n",
        "hf_model = AutoModelForSeq2SeqLM.from_pretrained(\"dsolomon/t5_base_peft\")\n",
        "hf_tokenizer = AutoTokenizer.from_pretrained(\"dsolomon/t5_base_peft\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "55ltE0rMrpy7",
        "outputId": "92d0b339-9376-433a-a789-a86d8b345f7b"
      },
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "False"
            ]
          },
          "metadata": {},
          "execution_count": 43
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Validate***"
      ],
      "metadata": {
        "id": "IF0uxoRnqVJZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def validate(epoch, tokenizer, model, device, loader):\n",
        "    model.eval()\n",
        "    predictions = []\n",
        "    actuals = []\n",
        "    with torch.no_grad():\n",
        "        for _, data in enumerate(loader, 0):\n",
        "            y = data['target_ids'].to(device, dtype = torch.long)\n",
        "            ids = data['source_ids'].to(device, dtype = torch.long)\n",
        "            mask = data['source_mask'].to(device, dtype = torch.long)\n",
        "\n",
        "            generated_ids = model.generate(\n",
        "                input_ids = ids,\n",
        "                attention_mask = mask,\n",
        "                min_length=200,\n",
        "                max_length=350,\n",
        "                num_beams=4,\n",
        "                repetition_penalty=2.5,\n",
        "                length_penalty=1.0,\n",
        "                early_stopping=True\n",
        "                )\n",
        "            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]\n",
        "            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]\n",
        "            #if _%100==0:\n",
        "                #print(f'Completed {_}')\n",
        "\n",
        "            predictions.extend(preds)\n",
        "            actuals.extend(target)\n",
        "    return predictions, actuals\n",
        "\n"
      ],
      "metadata": {
        "id": "K2Ca4RsZqYcH"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Validate with Rouge score***"
      ],
      "metadata": {
        "id": "HQswnteHj4SZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "def validate(epoch, tokenizer, model, device, loader):\n",
        "    model.eval()\n",
        "    predictions = []\n",
        "    actuals = []\n",
        "    rouge_scores = {'rouge-1': {'f': [], 'p': [], 'r': []}, 'rouge-2': {'f': [], 'p': [], 'r': []}}  # Store ROUGE scores\n",
        "\n",
        "    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2'], use_stemmer=True)\n",
        "\n",
        "    with torch.no_grad():\n",
        "        for _, data in enumerate(loader, 0):\n",
        "            y = data['target_ids'].to(device, dtype=torch.long)\n",
        "            ids = data['source_ids'].to(device, dtype=torch.long)\n",
        "            mask = data['source_mask'].to(device, dtype=torch.long)\n",
        "\n",
        "            generated_ids = model.generate(\n",
        "                input_ids=ids,\n",
        "                attention_mask=mask,\n",
        "                min_length=200,\n",
        "                max_length=350,\n",
        "                num_beams=4,\n",
        "                repetition_penalty=2.5,\n",
        "                length_penalty=1.0,\n",
        "                early_stopping=True\n",
        "            )\n",
        "            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]\n",
        "            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True) for t in y]\n",
        "\n",
        "            # Compute ROUGE scores for each pair of predictions and targets\n",
        "            for pred, tgt in zip(preds, target):\n",
        "                scores = scorer.score(pred, tgt)\n",
        "                rouge_scores['rouge-1']['f'].append(scores['rouge1'].fmeasure)\n",
        "                rouge_scores['rouge-1']['p'].append(scores['rouge1'].precision)\n",
        "                rouge_scores['rouge-1']['r'].append(scores['rouge1'].recall)\n",
        "                rouge_scores['rouge-2']['f'].append(scores['rouge2'].fmeasure)\n",
        "                rouge_scores['rouge-2']['p'].append(scores['rouge2'].precision)\n",
        "                rouge_scores['rouge-2']['r'].append(scores['rouge2'].recall)\n",
        "\n",
        "            predictions.extend(preds)\n",
        "            actuals.extend(target)\n",
        "\n",
        "    return predictions, actuals, rouge_scores\n",
        "\n"
      ],
      "metadata": {
        "id": "e-QuORqIj7u9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### ***Generate Predictions***"
      ],
      "metadata": {
        "id": "wVEzi1OhqdhJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "final_df = {}\n",
        "\n",
        "#just to check the first one\n",
        "val_set = CustomDataset(df_val.head(1), tokenizer, 512, 50)\n",
        "print (\"HEAD is : \", df_val.head(1))\n",
        "val_loader = DataLoader(val_set, **val_params)\n",
        "\n",
        "\n",
        "\n",
        "for epoch in range(0,EPOCHS):\n",
        "  predictions, actuals, rscore = validate(epoch, tokenizer, model, device, val_loader)\n",
        "  final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})\n",
        "\n"
      ],
      "metadata": {
        "id": "xWAPLKZVqgq6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "cd89158b-1063-4006-e76e-3cc24e4ec055"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "HEAD is :                                               article  \\\n",
            "0  summarize: approximately , one - third of pati...   \n",
            "\n",
            "                                            abstract  \n",
            "0   background and aim : there is lack of substan...  \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2645: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
            "  warnings.warn(\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Pvff_teL3DiM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(\"Hello\")\n",
        "print(final_df['Actual Text'])\n",
        "\n",
        "print(final_df['Generated Text'][:100])\n",
        "print(len(final_df['Generated Text']))\n",
        "print (final_df.info())\n",
        "print (final_df.iloc[0,0])\n",
        "print (len(final_df.iloc[0,0]))\n",
        "\n",
        "print(\"Rouge Score is: \", rscore)"
      ],
      "metadata": {
        "id": "xHR1P6Kx4N57",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "aff75e71-ad61-4719-d165-951773c2b944"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Hello\n",
            "0    background and aim : there is lack of substant...\n",
            "Name: Actual Text, dtype: object\n",
            "0    vte is one of the commonest causes of sudden u...\n",
            "Name: Generated Text, dtype: object\n",
            "1\n",
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 1 entries, 0 to 0\n",
            "Data columns (total 2 columns):\n",
            " #   Column          Non-Null Count  Dtype \n",
            "---  ------          --------------  ----- \n",
            " 0   Generated Text  1 non-null      object\n",
            " 1   Actual Text     1 non-null      object\n",
            "dtypes: object(2)\n",
            "memory usage: 144.0+ bytes\n",
            "None\n",
            "vte is one of the commonest causes of sudden unexplained deaths in hospitalized patients. it is not only disabling but also prolongs hospital stay and increases the cost of treatment. there was a need to systematically collect such data on patient characteristics, clinical outcomes, predictors of mortality in acute vte, management strategies and temporal trends in vte.materials and methodswe collected consecutive medical records of inpatients and outpatients between january 2006 and december 2010, meeting eligibility criteria ( confirmed diagnosis of acute or acute - on - chronic dvt by doppler ultrasound scan and/or electrolyte imbalance).resultsthe total number of participants were recorded at least 3 times per week for three days after surgery with no prophylaxis sessions ; pe = 0.150%= 1.133%*\n",
            "808\n",
            "Rouge Score is:  {'rouge-1': {'f': [0.1879194630872483], 'p': [0.5], 'r': [0.11570247933884298]}, 'rouge-2': {'f': [0.04081632653061225], 'p': [0.1111111111111111], 'r': [0.025]}}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "print(final_df['Actual Text'])\n",
        "\n",
        "\n",
        "print (len(final_df['Generated Text']))\n",
        "print(final_df['Generated Text'])\n"
      ],
      "metadata": {
        "id": "mIf9M25c2pna"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model"
      ],
      "metadata": {
        "id": "IV7YjA3SVWrZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "fVhHsf4cVjNw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Experimenting with a longer max length"
      ],
      "metadata": {
        "id": "QwBHxFZP3P0u"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Set random seeds and deterministic pytorch for reproducibility\n",
        "torch.manual_seed(42) # pytorch random seed\n",
        "np.random.seed(42) # numpy random seed\n",
        "torch.backends.cudnn.deterministic = True\n",
        "\n",
        "# tokenzier for encoding the text\n",
        "tokenizer = T5Tokenizer.from_pretrained(\"t5-small\")\n",
        "\n",
        "# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
        "# DO NOT SET THESE TOO HIGH OR IT WILL ABSOLUTELY SHIT ON THE GPU AND YOU WILL HAVE TO RESTART THE NOTEBOOK\n",
        "# USAGE WITH THESE VALUES ARE ~7GB SYSTEM RAM AND 8.6GB GPU RAM\n",
        "source_len = 1000\n",
        "target_len = 200\n",
        "\n",
        "# Creating the Training and Validation dataset for further creation of Dataloader\n",
        "training_set = CustomDataset(df_train, tokenizer, source_len, target_len)\n",
        "val_set = CustomDataset(df_val, tokenizer, source_len, target_len)\n",
        "\n",
        "# Defining the parameters for creation of dataloaders\n",
        "train_params = {\n",
        "    'batch_size': 8,\n",
        "    'shuffle': True,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "val_params = {\n",
        "    'batch_size': 8,\n",
        "    'shuffle': False,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "\n",
        "# Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.\n",
        "training_loader = DataLoader(training_set, **train_params)\n",
        "val_loader = DataLoader(val_set, **val_params)\n",
        "\n",
        "# define configuration for peft (source: https://www.philschmid.de/fine-tune-flan-t5-peft, https://huggingface.co/blog/peft)\n",
        "# peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM,\n",
        "#                                inference_mode=False,\n",
        "#                                r=8,\n",
        "#                                lora_alpha=32,\n",
        "#                                lora_dropout=0.1)\n",
        "lora_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM,\n",
        "                              inference_mode=False,\n",
        "                              r=16,\n",
        "                              bias='lora_only',\n",
        "                              use_rslora=True,\n",
        "                              lora_dropout=0.1,\n",
        "                              )\n",
        "\n",
        "# Defining the  PEFT model. We are using t5-base model and added a Language model layer on top for generation of Summary.\n",
        "# Further this model is sent to device (GPU/TPU) for using the hardware.\n",
        "model = get_peft_model(T5ForConditionalGeneration.from_pretrained('t5-small',output_hidden_states=True), peft_config)\n",
        "model.to(device)\n",
        "\n",
        "# take a peek at the peft model for comparison to non-peft\n",
        "print(f\"Preview of trainable parameters in the peft model\\n\")\n",
        "model.print_trainable_parameters()\n",
        "\n",
        "# Defining the optimizer that will be used to tune the weights of the network in the training session.\n",
        "optimizer = torch.optim.Adam(params =  model.parameters(), lr=3e-4) # 1e-4 ended with loss Loss:  2.074270486831665, now trying e-5 -- got 2.3540332317352295 ---> so far best lr seems to be 1e-4. 3e-4 got sub 2 loss, so pretty decent\n",
        "\n",
        "#summary(model, input_size=[(batch_size, input_length)])\n",
        "#summary(model, input_size=[(8, 6)])\n",
        "# print (model)\n",
        "\n",
        "\n",
        "EPOCHS = 1\n",
        "trainer = T5AbstractsTrainer(tokenizer, model, device, optimizer)\n",
        "for epoch in range(EPOCHS):\n",
        "    trainer.train(epoch, training_loader)\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "#for epoch in range(EPOCHS):\n",
        "#        train(epoch, tokenizer, model, device, training_loader, optimizer)\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QftATmRu3PUj",
        "outputId": "75f94afc-66ba-4432-8193-73487c6235bf"
      },
      "execution_count": null,
      "outputs": [
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Preview of trainable parameters in the peft model\n",
            "\n",
            "trainable params: 294,912 || all params: 60,801,536 || trainable%: 0.4850403779272945\n",
            "PeftModelForSeq2SeqLM(\n",
            "  (base_model): LoraModel(\n",
            "    (model): T5ForConditionalGeneration(\n",
            "      (shared): Embedding(32128, 512)\n",
            "      (encoder): T5Stack(\n",
            "        (embed_tokens): Embedding(32128, 512)\n",
            "        (block): ModuleList(\n",
            "          (0): T5Block(\n",
            "            (layer): ModuleList(\n",
            "              (0): T5LayerSelfAttention(\n",
            "                (SelfAttention): T5Attention(\n",
            "                  (q): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (k): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (v): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (o): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (relative_attention_bias): Embedding(32, 8)\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "              (1): T5LayerFF(\n",
            "                (DenseReluDense): T5DenseActDense(\n",
            "                  (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
            "                  (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
            "                  (dropout): Dropout(p=0.1, inplace=False)\n",
            "                  (act): ReLU()\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "            )\n",
            "          )\n",
            "          (1-5): 5 x T5Block(\n",
            "            (layer): ModuleList(\n",
            "              (0): T5LayerSelfAttention(\n",
            "                (SelfAttention): T5Attention(\n",
            "                  (q): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (k): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (v): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (o): Linear(in_features=512, out_features=512, bias=False)\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "              (1): T5LayerFF(\n",
            "                (DenseReluDense): T5DenseActDense(\n",
            "                  (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
            "                  (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
            "                  (dropout): Dropout(p=0.1, inplace=False)\n",
            "                  (act): ReLU()\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "            )\n",
            "          )\n",
            "        )\n",
            "        (final_layer_norm): T5LayerNorm()\n",
            "        (dropout): Dropout(p=0.1, inplace=False)\n",
            "      )\n",
            "      (decoder): T5Stack(\n",
            "        (embed_tokens): Embedding(32128, 512)\n",
            "        (block): ModuleList(\n",
            "          (0): T5Block(\n",
            "            (layer): ModuleList(\n",
            "              (0): T5LayerSelfAttention(\n",
            "                (SelfAttention): T5Attention(\n",
            "                  (q): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (k): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (v): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (o): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (relative_attention_bias): Embedding(32, 8)\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "              (1): T5LayerCrossAttention(\n",
            "                (EncDecAttention): T5Attention(\n",
            "                  (q): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (k): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (v): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (o): Linear(in_features=512, out_features=512, bias=False)\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "              (2): T5LayerFF(\n",
            "                (DenseReluDense): T5DenseActDense(\n",
            "                  (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
            "                  (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
            "                  (dropout): Dropout(p=0.1, inplace=False)\n",
            "                  (act): ReLU()\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "            )\n",
            "          )\n",
            "          (1-5): 5 x T5Block(\n",
            "            (layer): ModuleList(\n",
            "              (0): T5LayerSelfAttention(\n",
            "                (SelfAttention): T5Attention(\n",
            "                  (q): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (k): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (v): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (o): Linear(in_features=512, out_features=512, bias=False)\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "              (1): T5LayerCrossAttention(\n",
            "                (EncDecAttention): T5Attention(\n",
            "                  (q): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (k): Linear(in_features=512, out_features=512, bias=False)\n",
            "                  (v): lora.Linear(\n",
            "                    (base_layer): Linear(in_features=512, out_features=512, bias=False)\n",
            "                    (lora_dropout): ModuleDict(\n",
            "                      (default): Dropout(p=0.1, inplace=False)\n",
            "                    )\n",
            "                    (lora_A): ModuleDict(\n",
            "                      (default): Linear(in_features=512, out_features=8, bias=False)\n",
            "                    )\n",
            "                    (lora_B): ModuleDict(\n",
            "                      (default): Linear(in_features=8, out_features=512, bias=False)\n",
            "                    )\n",
            "                    (lora_embedding_A): ParameterDict()\n",
            "                    (lora_embedding_B): ParameterDict()\n",
            "                  )\n",
            "                  (o): Linear(in_features=512, out_features=512, bias=False)\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "              (2): T5LayerFF(\n",
            "                (DenseReluDense): T5DenseActDense(\n",
            "                  (wi): Linear(in_features=512, out_features=2048, bias=False)\n",
            "                  (wo): Linear(in_features=2048, out_features=512, bias=False)\n",
            "                  (dropout): Dropout(p=0.1, inplace=False)\n",
            "                  (act): ReLU()\n",
            "                )\n",
            "                (layer_norm): T5LayerNorm()\n",
            "                (dropout): Dropout(p=0.1, inplace=False)\n",
            "              )\n",
            "            )\n",
            "          )\n",
            "        )\n",
            "        (final_layer_norm): T5LayerNorm()\n",
            "        (dropout): Dropout(p=0.1, inplace=False)\n",
            "      )\n",
            "      (lm_head): Linear(in_features=512, out_features=32128, bias=False)\n",
            "    )\n",
            "  )\n",
            ")\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2645: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "RUN: 0\n",
            "\t[2024-04-03 00:55:40 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 5.606602668762207\n",
            "RUN: 100\n",
            "\t[2024-04-03 00:56:07 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9735965728759766\n",
            "RUN: 200\n",
            "\t[2024-04-03 00:56:32 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.890866279602051\n",
            "RUN: 300\n",
            "\t[2024-04-03 00:56:59 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.4168484210968018\n",
            "RUN: 400\n",
            "\t[2024-04-03 00:57:25 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.409773111343384\n",
            "RUN: 500\n",
            "\t[2024-04-03 00:57:51 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6389687061309814\n",
            "RUN: 600\n",
            "\t[2024-04-03 00:58:16 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.0353121757507324\n",
            "RUN: 700\n",
            "\t[2024-04-03 00:58:42 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7438268661499023\n",
            "RUN: 800\n",
            "\t[2024-04-03 00:59:09 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.5309488773345947\n",
            "RUN: 900\n",
            "\t[2024-04-03 00:59:35 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.0657596588134766\n",
            "RUN: 1000\n",
            "\t[2024-04-03 01:00:02 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.2870266437530518\n",
            "RUN: 1100\n",
            "\t[2024-04-03 01:00:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.606289863586426\n",
            "RUN: 1200\n",
            "\t[2024-04-03 01:00:55 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.573498010635376\n",
            "RUN: 1300\n",
            "\t[2024-04-03 01:01:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.8080382347106934\n",
            "RUN: 1400\n",
            "\t[2024-04-03 01:01:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.986100196838379\n",
            "RUN: 1500\n",
            "\t[2024-04-03 01:02:12 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.0960135459899902\n",
            "RUN: 1600\n",
            "\t[2024-04-03 01:02:38 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.640584707260132\n",
            "RUN: 1700\n",
            "\t[2024-04-03 01:03:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.105868101119995\n",
            "RUN: 1800\n",
            "\t[2024-04-03 01:03:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.881739377975464\n",
            "RUN: 1900\n",
            "\t[2024-04-03 01:03:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.006854772567749\n",
            "RUN: 2000\n",
            "\t[2024-04-03 01:04:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6896450519561768\n",
            "RUN: 2100\n",
            "\t[2024-04-03 01:04:49 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.857508420944214\n",
            "RUN: 2200\n",
            "\t[2024-04-03 01:05:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9823272228240967\n",
            "RUN: 2300\n",
            "\t[2024-04-03 01:05:41 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.475969076156616\n",
            "RUN: 2400\n",
            "\t[2024-04-03 01:06:08 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9213335514068604\n",
            "RUN: 2500\n",
            "\t[2024-04-03 01:06:33 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.851353406906128\n",
            "RUN: 2600\n",
            "\t[2024-04-03 01:06:59 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6765713691711426\n",
            "RUN: 2700\n",
            "\t[2024-04-03 01:07:26 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5350091457366943\n",
            "RUN: 2800\n",
            "\t[2024-04-03 01:07:52 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.0926151275634766\n",
            "RUN: 2900\n",
            "\t[2024-04-03 01:08:19 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.374559164047241\n",
            "RUN: 3000\n",
            "\t[2024-04-03 01:08:45 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.1417038440704346\n",
            "RUN: 3100\n",
            "\t[2024-04-03 01:09:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6612653732299805\n",
            "RUN: 3200\n",
            "\t[2024-04-03 01:09:36 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5269505977630615\n",
            "RUN: 3300\n",
            "\t[2024-04-03 01:10:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6219828128814697\n",
            "RUN: 3400\n",
            "\t[2024-04-03 01:10:28 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.798788547515869\n",
            "RUN: 3500\n",
            "\t[2024-04-03 01:10:54 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.328030586242676\n",
            "RUN: 3600\n",
            "\t[2024-04-03 01:11:20 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.600252866744995\n",
            "RUN: 3700\n",
            "\t[2024-04-03 01:11:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.3896756172180176\n",
            "RUN: 3800\n",
            "\t[2024-04-03 01:12:12 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4200026988983154\n",
            "RUN: 3900\n",
            "\t[2024-04-03 01:12:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4558138847351074\n",
            "RUN: 4000\n",
            "\t[2024-04-03 01:13:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4888086318969727\n",
            "RUN: 4100\n",
            "\t[2024-04-03 01:13:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5012738704681396\n",
            "RUN: 4200\n",
            "\t[2024-04-03 01:13:57 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.616546154022217\n",
            "RUN: 4300\n",
            "\t[2024-04-03 01:14:23 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.819748640060425\n",
            "RUN: 4400\n",
            "\t[2024-04-03 01:14:49 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7400612831115723\n",
            "RUN: 4500\n",
            "\t[2024-04-03 01:15:15 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.634427070617676\n",
            "RUN: 4600\n",
            "\t[2024-04-03 01:15:42 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7550437450408936\n",
            "RUN: 4700\n",
            "\t[2024-04-03 01:16:08 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.742583990097046\n",
            "RUN: 4800\n",
            "\t[2024-04-03 01:16:33 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.712233781814575\n",
            "RUN: 4900\n",
            "\t[2024-04-03 01:17:00 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4491829872131348\n",
            "RUN: 5000\n",
            "\t[2024-04-03 01:17:26 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.063829183578491\n",
            "RUN: 5100\n",
            "\t[2024-04-03 01:17:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.730354070663452\n",
            "RUN: 5200\n",
            "\t[2024-04-03 01:18:19 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5418028831481934\n",
            "RUN: 5300\n",
            "\t[2024-04-03 01:18:45 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.926936388015747\n",
            "RUN: 5400\n",
            "\t[2024-04-03 01:19:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.47989559173584\n",
            "RUN: 5500\n",
            "\t[2024-04-03 01:19:37 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.026101589202881\n",
            "RUN: 5600\n",
            "\t[2024-04-03 01:20:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.8358154296875\n",
            "RUN: 5700\n",
            "\t[2024-04-03 01:20:28 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.394108295440674\n",
            "RUN: 5800\n",
            "\t[2024-04-03 01:20:54 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.374922513961792\n",
            "RUN: 5900\n",
            "\t[2024-04-03 01:21:20 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.936565637588501\n",
            "RUN: 6000\n",
            "\t[2024-04-03 01:21:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.254088878631592\n",
            "RUN: 6100\n",
            "\t[2024-04-03 01:22:12 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.0774121284484863\n",
            "RUN: 6200\n",
            "\t[2024-04-03 01:22:38 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5209412574768066\n",
            "RUN: 6300\n",
            "\t[2024-04-03 01:23:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7391326427459717\n",
            "RUN: 6400\n",
            "\t[2024-04-03 01:23:30 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6077308654785156\n",
            "RUN: 6500\n",
            "\t[2024-04-03 01:23:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.746046781539917\n",
            "RUN: 6600\n",
            "\t[2024-04-03 01:24:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.994023323059082\n",
            "RUN: 6700\n",
            "\t[2024-04-03 01:24:47 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.155611038208008\n",
            "RUN: 6800\n",
            "\t[2024-04-03 01:25:13 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6661503314971924\n",
            "RUN: 6900\n",
            "\t[2024-04-03 01:25:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.0935468673706055\n",
            "RUN: 7000\n",
            "\t[2024-04-03 01:26:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6312992572784424\n",
            "RUN: 7100\n",
            "\t[2024-04-03 01:26:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.717634677886963\n",
            "RUN: 7200\n",
            "\t[2024-04-03 01:26:57 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9544122219085693\n",
            "RUN: 7300\n",
            "\t[2024-04-03 01:27:23 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7999749183654785\n",
            "RUN: 7400\n",
            "\t[2024-04-03 01:27:48 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.736011028289795\n",
            "RUN: 7500\n",
            "\t[2024-04-03 01:28:15 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5557117462158203\n",
            "RUN: 7600\n",
            "\t[2024-04-03 01:28:41 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.367460250854492\n",
            "RUN: 7700\n",
            "\t[2024-04-03 01:29:07 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.619030237197876\n",
            "RUN: 7800\n",
            "\t[2024-04-03 01:29:34 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.233703851699829\n",
            "RUN: 7900\n",
            "\t[2024-04-03 01:30:00 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6770310401916504\n",
            "RUN: 8000\n",
            "\t[2024-04-03 01:30:26 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.2643282413482666\n",
            "RUN: 8100\n",
            "\t[2024-04-03 01:30:52 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.934147596359253\n",
            "RUN: 8200\n",
            "\t[2024-04-03 01:31:18 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5340609550476074\n",
            "RUN: 8300\n",
            "\t[2024-04-03 01:31:43 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.3915598392486572\n",
            "RUN: 8400\n",
            "\t[2024-04-03 01:32:09 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.48177433013916\n",
            "RUN: 8500\n",
            "\t[2024-04-03 01:32:37 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6831798553466797\n",
            "RUN: 8600\n",
            "\t[2024-04-03 01:33:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.810021162033081\n",
            "RUN: 8700\n",
            "\t[2024-04-03 01:33:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4839463233947754\n",
            "RUN: 8800\n",
            "\t[2024-04-03 01:33:55 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.887545585632324\n",
            "RUN: 8900\n",
            "\t[2024-04-03 01:34:21 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6618003845214844\n",
            "RUN: 9000\n",
            "\t[2024-04-03 01:34:48 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4844439029693604\n",
            "RUN: 9100\n",
            "\t[2024-04-03 01:35:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6922528743743896\n",
            "RUN: 9200\n",
            "\t[2024-04-03 01:35:40 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.484701156616211\n",
            "RUN: 9300\n",
            "\t[2024-04-03 01:36:06 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9253411293029785\n",
            "RUN: 9400\n",
            "\t[2024-04-03 01:36:32 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5677759647369385\n",
            "RUN: 9500\n",
            "\t[2024-04-03 01:36:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.824228525161743\n",
            "RUN: 9600\n",
            "\t[2024-04-03 01:37:24 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.032925605773926\n",
            "RUN: 9700\n",
            "\t[2024-04-03 01:37:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.2605338096618652\n",
            "RUN: 9800\n",
            "\t[2024-04-03 01:38:17 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.497173309326172\n",
            "RUN: 9900\n",
            "\t[2024-04-03 01:38:44 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6151299476623535\n",
            "RUN: 10000\n",
            "\t[2024-04-03 01:39:10 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.154313325881958\n",
            "RUN: 10100\n",
            "\t[2024-04-03 01:39:36 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.1704325675964355\n",
            "RUN: 10200\n",
            "\t[2024-04-03 01:40:02 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.264756441116333\n",
            "RUN: 10300\n",
            "\t[2024-04-03 01:40:27 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6764838695526123\n",
            "RUN: 10400\n",
            "\t[2024-04-03 01:40:54 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.904367685317993\n",
            "RUN: 10500\n",
            "\t[2024-04-03 01:41:20 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.654892683029175\n",
            "RUN: 10600\n",
            "\t[2024-04-03 01:41:46 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.8638575077056885\n",
            "RUN: 10700\n",
            "\t[2024-04-03 01:42:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.349194288253784\n",
            "RUN: 10800\n",
            "\t[2024-04-03 01:42:37 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.529778003692627\n",
            "RUN: 10900\n",
            "\t[2024-04-03 01:43:04 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.8080191612243652\n",
            "RUN: 11000\n",
            "\t[2024-04-03 01:43:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 1.8376271724700928\n",
            "RUN: 11100\n",
            "\t[2024-04-03 01:43:57 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.0657739639282227\n",
            "RUN: 11200\n",
            "\t[2024-04-03 01:44:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4990525245666504\n",
            "RUN: 11300\n",
            "\t[2024-04-03 01:44:48 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7400810718536377\n",
            "RUN: 11400\n",
            "\t[2024-04-03 01:45:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7578577995300293\n",
            "RUN: 11500\n",
            "\t[2024-04-03 01:45:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5373787879943848\n",
            "RUN: 11600\n",
            "\t[2024-04-03 01:46:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.875976085662842\n",
            "RUN: 11700\n",
            "\t[2024-04-03 01:46:31 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.219557523727417\n",
            "RUN: 11800\n",
            "\t[2024-04-03 01:46:56 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5480756759643555\n",
            "RUN: 11900\n",
            "\t[2024-04-03 01:47:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.7873141765594482\n",
            "RUN: 12000\n",
            "\t[2024-04-03 01:47:47 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.59726881980896\n",
            "RUN: 12100\n",
            "\t[2024-04-03 01:48:13 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.418877363204956\n",
            "RUN: 12200\n",
            "\t[2024-04-03 01:48:39 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.747764825820923\n",
            "RUN: 12300\n",
            "\t[2024-04-03 01:49:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4009974002838135\n",
            "RUN: 12400\n",
            "\t[2024-04-03 01:49:32 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 1.971482276916504\n",
            "RUN: 12500\n",
            "\t[2024-04-03 01:49:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6216752529144287\n",
            "RUN: 12600\n",
            "\t[2024-04-03 01:50:24 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9950804710388184\n",
            "RUN: 12700\n",
            "\t[2024-04-03 01:50:50 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.465711832046509\n",
            "RUN: 12800\n",
            "\t[2024-04-03 01:51:16 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5276646614074707\n",
            "RUN: 12900\n",
            "\t[2024-04-03 01:51:43 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.2653439044952393\n",
            "RUN: 13000\n",
            "\t[2024-04-03 01:52:08 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.87477445602417\n",
            "RUN: 13100\n",
            "\t[2024-04-03 01:52:34 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.2494425773620605\n",
            "RUN: 13200\n",
            "\t[2024-04-03 01:53:01 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.0467398166656494\n",
            "RUN: 13300\n",
            "\t[2024-04-03 01:53:27 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.434393882751465\n",
            "RUN: 13400\n",
            "\t[2024-04-03 01:53:53 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6050217151641846\n",
            "RUN: 13500\n",
            "\t[2024-04-03 01:54:19 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.5973856449127197\n",
            "RUN: 13600\n",
            "\t[2024-04-03 01:54:45 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4734866619110107\n",
            "RUN: 13700\n",
            "\t[2024-04-03 01:55:11 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4022574424743652\n",
            "RUN: 13800\n",
            "\t[2024-04-03 01:55:37 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.284604787826538\n",
            "RUN: 13900\n",
            "\t[2024-04-03 01:56:03 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.3924460411071777\n",
            "RUN: 14000\n",
            "\t[2024-04-03 01:56:29 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.1184329986572266\n",
            "RUN: 14100\n",
            "\t[2024-04-03 01:56:55 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4028615951538086\n",
            "RUN: 14200\n",
            "\t[2024-04-03 01:57:22 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.831223726272583\n",
            "RUN: 14300\n",
            "\t[2024-04-03 01:57:49 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.6315062046051025\n",
            "RUN: 14400\n",
            "\t[2024-04-03 01:58:14 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 3.269657850265503\n",
            "RUN: 14500\n",
            "\t[2024-04-03 01:58:40 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.0901331901550293\n",
            "RUN: 14600\n",
            "\t[2024-04-03 01:59:05 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9382808208465576\n",
            "RUN: 14700\n",
            "\t[2024-04-03 01:59:32 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.4479732513427734\n",
            "RUN: 14800\n",
            "\t[2024-04-03 01:59:58 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.459540843963623\n",
            "RUN: 14900\n",
            "\t[2024-04-03 02:00:25 PDT-0700]\n",
            "Epoch: 0, Batch Size: 8, Loss: 2.9134011268615723\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!nvidia-smi"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vgPzBAZW4FU1",
        "outputId": "f8465ade-15de-4245-e65e-448387332cdb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Wed Apr  3 07:48:10 2024       \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n",
            "|-----------------------------------------+----------------------+----------------------+\n",
            "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                                         |                      |               MIG M. |\n",
            "|=========================================+======================+======================|\n",
            "|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   32C    P0              24W / 300W |      2MiB / 16384MiB |      0%      Default |\n",
            "|                                         |                      |                  N/A |\n",
            "+-----------------------------------------+----------------------+----------------------+\n",
            "                                                                                         \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| Processes:                                                                            |\n",
            "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
            "|        ID   ID                                                             Usage      |\n",
            "|=======================================================================================|\n",
            "|  No running processes found                                                           |\n",
            "+---------------------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# wipes the GPU memory without having to restart colab but now nothing works :(\n",
        "# !pip install numba\n",
        "\n",
        "# import numba\n",
        "# cuda_device = numba.cuda.get_current_device()\n",
        "# cuda_device.reset()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mxi3-ZQh65nU",
        "outputId": "8da832de-fcea-4d27-a0a1-6d9b040890d5"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (0.58.1)\n",
            "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba) (0.41.1)\n",
            "Requirement already satisfied: numpy<1.27,>=1.22 in /usr/local/lib/python3.10/dist-packages (from numba) (1.25.2)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Redefining T5AbstractsTrainer Class"
      ],
      "metadata": {
        "id": "XoIExUYS7cM6"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "T5_MODEL = 'google-t5/t5-base'"
      ],
      "metadata": {
        "id": "574C0OvKIyPo"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# removing additional linear layer and tanh activation function\n",
        "# idea at this point is to finetune the t5 model on the data and then create another model that calls the pretrained t5 and then uses the outputs of the pretrained t5 to train additional layers on top of that\n",
        "\n",
        "class T5AbstractsTrainer:\n",
        "    def __init__(self, tokenizer, model, device, optimizer):\n",
        "        self.tokenizer = tokenizer\n",
        "        self.model = model\n",
        "        self.device = device\n",
        "        self.optimizer = optimizer\n",
        "\n",
        "\n",
        "\n",
        "    def forward(self, ids, mask, y_ids, labels):\n",
        "      outputs = self.model(input_ids=ids, attention_mask=mask, decoder_input_ids=y_ids, labels=labels)\n",
        "      loss, logits = outputs.loss, outputs.logits\n",
        "\n",
        "      # return loss, tanh_output\n",
        "      return loss, logits\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "    def train(self, epoch, loader):\n",
        "        self.model.train()\n",
        "\n",
        "        for _, data in enumerate(loader, 0):\n",
        "            y = data['target_ids'].to(self.device, dtype=torch.long)\n",
        "            y_ids = y[:, :-1].contiguous()\n",
        "            labels = y[:, 1:].clone().detach()\n",
        "            labels[y[:, 1:] == self.tokenizer.pad_token_id] = -100\n",
        "            ids = data['source_ids'].to(self.device, dtype=torch.long)\n",
        "            mask = data['source_mask'].to(self.device, dtype=torch.long)\n",
        "\n",
        "            outputs = self.model(input_ids=ids, attention_mask=mask, decoder_input_ids=y_ids, labels=labels)\n",
        "            loss = outputs[0]\n",
        "\n",
        "            if _ % 100 == 0:\n",
        "                print(\"RUN:\", _)\n",
        "                timestamp = datetime.now(pytz.timezone('America/Los_Angeles')).strftime('%Y-%m-%d %H:%M:%S %Z%z')\n",
        "                print(f\"\\t[{timestamp}]\")\n",
        "                print(f'Epoch: {epoch}, Batch Size: {ids.size(0)}, Loss: {loss.item()}')\n",
        "\n",
        "            self.optimizer.zero_grad()\n",
        "            loss.backward()\n",
        "            self.optimizer.step()\n"
      ],
      "metadata": {
        "id": "EwBT7E1u7glD"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# removing additional linear layer and tanh activation function\n",
        "# idea at this point is to finetune the t5 model on the data and then create another model that calls the pretrained t5 and then uses the outputs of the pretrained t5 to train additional layers on top of that\n",
        "\n",
        "class YuriTheTrainer:\n",
        "    def __init__(self, tokenizer, t5_model, device, optimizer):\n",
        "        self.tokenizer = tokenizer\n",
        "        self.t5_model = t5_model\n",
        "        self.device = device\n",
        "        self.optimizer = optimizer\n",
        "        self.criterion = nn.CrossEntropyLoss()\n",
        "\n",
        "        # additional layer parameters\n",
        "        self.MIN_LENGTH = 100\n",
        "        self.MAX_LENGTH = 300\n",
        "\n",
        "        # T5 Embedding has shape (32128, 768)\n",
        "        self.i_haf_to_travel_to_asia = nn.Sequential(nn.Conv1d(32128, 32128, 4, bias=True),\n",
        "                                                     nn.Dropout(0.1),\n",
        "                                                     nn.Tanh())\n",
        "\n",
        "\n",
        "\n",
        "    def forward(self, ids, mask, y_ids, labels):\n",
        "      outputs = self.t5_model(input_ids=ids, attention_mask=mask, decoder_input_ids=y_ids, labels=labels)\n",
        "      loss, logits = outputs.loss, outputs.logits\n",
        "\n",
        "      # return loss, tanh_output\n",
        "      return loss, logits\n",
        "\n",
        "\n",
        "    def train(self, epoch, loader):\n",
        "        self.model.train()\n",
        "\n",
        "        for _, data in enumerate(loader, 0):\n",
        "          # Get target data from dataloader and send to gpu as a torch datatype\n",
        "          y = data['target_ids'].to(self.device, dtype=torch.long)\n",
        "\n",
        "          # reformat target data ids\n",
        "          y_ids = y[:, :-1].contiguous()\n",
        "\n",
        "          # define and reformat labels and add missing token id\n",
        "          labels = y[:, 1:].clone().detach()\n",
        "          labels[y[:, 1:] == self.tokenizer.pad_token_id] = -100\n",
        "\n",
        "          # Get source data ids and mask from dataloader and send to gpu as a torch datatype\n",
        "          ids = data['source_ids'].to(self.device, dtype=torch.long)\n",
        "          mask = data['source_mask'].to(self.device, dtype=torch.long)\n",
        "\n",
        "          # Get outputs from model\n",
        "          # outputs = self.t5_model(input_ids=ids, attention_mask=mask, decoder_input_ids=y_ids, labels=labels)\n",
        "          t5_generate = model.generate(input_ids=ids.expand(1,-1), min_length=self.MIN_LENGTH, max_length=self.MAX_LENGTH)\n",
        "          loss = outputs[0]\n",
        "\n",
        "          loss = self.criterion(y_pred, y)\n",
        "\n",
        "          if _ % 100 == 0:\n",
        "              print(\"RUN:\", _)\n",
        "              timestamp = datetime.now(pytz.timezone('America/Los_Angeles')).strftime('%Y-%m-%d %H:%M:%S %Z%z')\n",
        "              print(f\"\\t[{timestamp}]\")\n",
        "              print(f'Epoch: {epoch}, Batch Size: {ids.size(0)}, Loss: {loss.item()}')\n",
        "\n",
        "          self.optimizer.zero_grad()\n",
        "          loss.backward()\n",
        "          self.optimizer.step()\n"
      ],
      "metadata": {
        "id": "CmIx7-GXdjzn"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Train T5AbtractsTrainer"
      ],
      "metadata": {
        "id": "Cx1D9j2Z7zvI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Set random seeds and deterministic pytorch for reproducibility\n",
        "#torch.manual_seed(42) # pytorch random seed\n",
        "np.random.seed(42) # numpy random seed\n",
        "torch.backends.cudnn.deterministic = True\n",
        "\n",
        "# tokenzier for encoding the text\n",
        "tokenizer = T5Tokenizer.from_pretrained(T5_MODEL)\n",
        "\n",
        "# tokenization hyperparameter definitions\n",
        "MAX_LEN = 700\n",
        "SUMMARY_LEN = 150\n",
        "\n",
        "# Creating the Training and Validation dataset for further creation of Dataloader\n",
        "training_set = CustomDataset(df_train, tokenizer, MAX_LEN, SUMMARY_LEN)\n",
        "val_set = CustomDataset(df_val, tokenizer, MAX_LEN, SUMMARY_LEN)\n",
        "\n",
        "# Defining the parameters for creation of dataloaders\n",
        "BATCHSIZE = 7\n",
        "train_params = {\n",
        "    'batch_size': BATCHSIZE,\n",
        "    'shuffle': True,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "val_params = {\n",
        "    'batch_size': BATCHSIZE,\n",
        "    'shuffle': False,\n",
        "    'num_workers': 0\n",
        "    }\n",
        "\n",
        "\n",
        "# Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.\n",
        "training_loader = DataLoader(training_set, **train_params)\n",
        "val_loader = DataLoader(val_set, **val_params)\n",
        "\n",
        "# define configuration for peft (source: https://www.philschmid.de/fine-tune-flan-t5-peft, https://huggingface.co/blog/peft)\n",
        "lora_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM,\n",
        "                               inference_mode=False,\n",
        "                               r=8,\n",
        "                               lora_alpha=32,\n",
        "                               lora_dropout=0.1)\n",
        "\n",
        "# base T5 model from hugging face\n",
        "t5_model = T5ForConditionalGeneration.from_pretrained(T5_MODEL,output_hidden_states=True,return_dict=True)\n",
        "\n",
        "# Defining the  PEFT model. We are using t5-base model and added a Language model layer on top for generation of Summary.\n",
        "# Further this model is sent to device (GPU/TPU) for using the hardware.\n",
        "model = get_peft_model(t5_model, lora_config)\n",
        "model.to(device)\n",
        "\n",
        "# take a peek at the peft model for comparison to non-peft\n",
        "print(f\"Preview of trainable parameters in the peft model\\n\")\n",
        "model.print_trainable_parameters()\n",
        "\n",
        "# Defining the optimizer that will be used to tune the weights of the network in the training session.\n",
        "optimizer = torch.optim.Adam(params = model.parameters(), lr=3e-4) # 1e-4 ended with loss Loss:  2.074270486831665, now trying e-5 -- got 2.3540332317352295 ---> so far best lr seems to be 1e-4. 3e-4 got sub 2 loss, so pretty decent\n",
        "\n",
        "#summary(model, input_size=[(batch_size, input_length)])\n",
        "#summary(model, input_size=[(8, 6)])\n",
        "# print (model)\n",
        "\n",
        "# train the model for 2 epochs\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "#for epoch in range(EPOCHS):\n",
        "#        train(epoch, tokenizer, model, device, training_loader, optimizer)\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 282,
          "referenced_widgets": [
            "b87c718ffa014b698f5302ac4b85cfbc",
            "cb97a7229b5b4f459667c44142048584",
            "2366c60a468a4bdaa4c20f9b0d379e61",
            "a8c724b552f84e999ff1e3fe88eb0f1d",
            "cf5e4a8ccf0343d0be2d081ba55b23ba",
            "7d52639c06804f758bcc380a5adb39fc",
            "360ea7f698c14b3484830110aaff859d",
            "8d3208bae8f043d99be8298afb4016af",
            "ef2b8fbfb131431484a3c95f3d13de67",
            "f4ade4961f154ee2934160a530806093",
            "b8f34d5bd4f54428bbd0a15bfc7ca617",
            "766a501c262b4ad88b705c78e01da38b",
            "9e90f5eda3454b83a4fb3ceac3dca8f7",
            "6f24fb85ada64b52acc31ad0295ef427",
            "ad9ac6877f2043e59fb0916928433b90",
            "3099f67fc7134681b93ffa6d4453fd9e",
            "cbd5aaff0e0540b0bc127a88369b4852",
            "989eeccc4a7148fcbb17ff753ce1bb82",
            "c651cbf7cc1143e3b91621646c2d9e4c",
            "a6140294e36a40a2bcd0ff4de8452d66",
            "9dd6162cab974aa0b3c3ce0480a507e8",
            "9b9b8b2cf24341aa8a93c28f48f7026f",
            "470127267c5f494990a1b1045d14ea78",
            "4b58e9b647754187a1da8e4aa76fba16",
            "ba5f5aa83c314e7a855a52f933fd036d",
            "8dfd49369fb04e1fa4eba898d32ef9e0",
            "b689524c5fb649189544fe711b52dfad",
            "e4b1c27fad4e439eafb92c8cdaa6805d",
            "2f9a191d5de64bd0a66c9b1bb3056cff",
            "2b67c83f83f841cc868698e4de992a5e",
            "0c210297cc234a8f95f13ab5d5a27017",
            "7b7acc7c41b24c4da4bd010ce3f26672",
            "2115a85109ab46e0bd6f22933a09c2c9",
            "934d2bd4c7f14719897a632189585bf2",
            "322b61fabafd46289cd92dc0d064b50a",
            "a6e709590ed74314b91990c36f1c5f03",
            "f052fe4d2c3644f294512dbeddff515d",
            "05077183f5184805b05f32175e30c645",
            "dfe84194bc90499d97b444b7f57b1091",
            "27b04743f9ca4bc3be5cc693252a63ba",
            "e82307c2afe04518ba58109eaf460b89",
            "63512e102325459cbf8659efb7e75fd4",
            "75b5501fcd4e4e809eb1a9cb21fc0a0c",
            "3945b5e0289542e7b330d5a81e93623b",
            "b6aa0aa60397484fa7a4824859a3badd",
            "c89d0d82fc9440bd996a27f8ae8e5411",
            "abfae4595dd64f23bf73aa765c3b10f8",
            "e23fe9ac4f6c4115aa976a0925a1b4b9",
            "c1b8e20ddbdd4e189b93d2c0e66ac7ba",
            "b4d6e7a2455947e19a9ce0914dfdba9c",
            "ad8d3584468f4dfd9e2269f9fafc52e4",
            "30c1d242f2af438995d7bb175d80e52f",
            "77c83e27bde54a679fc785d86dbc6a0c",
            "75e56ac1e91a4d50ad4f33f9e74d413a",
            "d6725d7e91d54658a9a495b071339bd5"
          ]
        },
        "id": "SulCsatV7yjM",
        "outputId": "bc6fd3d0-8009-4133-fef7-4a023aa9489e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b87c718ffa014b698f5302ac4b85cfbc"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "766a501c262b4ad88b705c78e01da38b"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "470127267c5f494990a1b1045d14ea78"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "934d2bd4c7f14719897a632189585bf2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b6aa0aa60397484fa7a4824859a3badd"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Preview of trainable parameters in the peft model\n",
            "\n",
            "trainable params: 884,736 || all params: 223,788,288 || trainable%: 0.3953450861557152\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# take a peaky poo at the model to verify layer sizes and such\n",
        "t5trainer = T5AbstractsTrainer(tokenizer, model, device, optimizer)\n",
        "\n",
        "out = t5_model(input_ids=training_set[0]['source_ids'].expand(1,-1), decoder_input_ids=training_set[0]['source_ids'].expand(1,-1))"
      ],
      "metadata": {
        "id": "0rOISEuI2J7f"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "gen_out = model.generate(input_ids=training_set[0]['source_ids'].expand(1,-1), min_length=100, max_length=300)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "g1C9WuYs61Ip",
        "outputId": "c8926499-de10-4ea1-ae57-860a10dd07a2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2645: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
            "  warnings.warn(\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "EPOCHS = 1\n",
        "trainer = T5AbstractsTrainer(tokenizer, model, device, optimizer)\n",
        "for epoch in range(EPOCHS):\n",
        "    trainer.train(epoch, training_loader)"
      ],
      "metadata": {
        "id": "YewmqZvX2JbW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Train YuriTheTrainer with fine tuned T5AbtractsTrainer"
      ],
      "metadata": {
        "id": "Z9VU_kgI1aYy"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "EPOCHS = 1\n",
        "yuri = YuriTheTrainer(tokenizer, model=trainer, device, optimizer)\n",
        "for epoch in range(EPOCHS):\n",
        "    yuri.train(epoch, training_loader)"
      ],
      "metadata": {
        "id": "d6U5f5c11aKj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Straight Outta SF"
      ],
      "metadata": {
        "id": "9GbEb8KSKGmW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "class Conv1DLayer(nn.Module):\n",
        "    def __init__(self, input_size, output_size, kernel_size):\n",
        "        super(Conv1DLayer, self).__init__()\n",
        "        self.conv1d = nn.Conv1d(input_size, output_size, kernel_size)\n",
        "        self.relu = nn.ReLU()\n",
        "\n",
        "    def forward(self, x):\n",
        "        # x: [batch_size, sequence_length, input_size]\n",
        "        x = x.permute(0, 2, 1)  # Conv1d expects input in the format: [batch_size, input_size, sequence_length]\n",
        "        x = self.conv1d(x)\n",
        "        x = self.relu(x)\n",
        "        return x\n",
        "\n",
        "class T5WithConv1D(nn.Module):\n",
        "    def __init__(self, conv_config):\n",
        "        super(T5WithConv1D, self).__init__()\n",
        "\n",
        "        self.t5_model = T5Model.from_config(T5_MODEL)\n",
        "\n",
        "        self.conv1d_layer = Conv1DLayer(conv_config['input_size'],\n",
        "                                        conv_config['output_size'],\n",
        "                                        conv_config['kernel_size'])\n",
        "\n",
        "    def forward(self, input_ids, attention_mask):\n",
        "        # T5 forward pass\n",
        "        t5_outputs = self.t5_model(input_ids=input_ids,\n",
        "                                   attention_mask=attention_mask,\n",
        "                                   output_hidden_states=True,\n",
        "                                   return_dict=True)\n",
        "\n",
        "        last_hidden_state = t5_outputs.last_hidden_state  # shape: [batch_size, sequence_length, hidden_size]\n",
        "\n",
        "        # Apply Conv1D layer\n",
        "        conv_output = self.conv1d_layer(last_hidden_state)\n",
        "\n",
        "        return conv_output\n",
        "\n",
        "conv_config = {\n",
        "    'input_size': t5_model_config['d_model'],\n",
        "    'output_size': t5_model_config['d_model'],\n",
        "    'kernel_size': 4\n",
        "}\n",
        "\n",
        "t5_with_conv1d = T5WithConv1D(conv_config)\n",
        "\n",
        "# Example input\n",
        "input_ids = torch.randint(0, 100, (2, 10))  # Batch size of 2, sequence length of 10\n",
        "attention_mask = torch.ones_like(input_ids)\n",
        "\n",
        "# Forward pass\n",
        "output = t5_with_conv1d(input_ids, attention_mask)\n",
        "print(output.shape)  # Example output shape: torch.Size([2, 128, 8])\n"
      ],
      "metadata": {
        "id": "5nDSljK_KEDU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers.optimization import AdamW\n",
        "from tqdm import tqdm\n",
        "\n",
        "# Instantiate T5 model and Conv1D layer\n",
        "t5_model_config = T5Config.from_pretrained(T5_MODEL)\n",
        "conv_config = {'input_size': t5_model_config.d_model, 'output_size': t5_model_config.d_model, 'kernel_size': 4}\n",
        "t5_with_conv1d = T5WithConv1D(t5_model_config, conv_config)\n",
        "\n",
        "# Define your loss function and optimizer\n",
        "criterion = nn.CrossEntropyLoss()\n",
        "optimizer = AdamW(t5_with_conv1d.parameters(), lr=3e-5)\n",
        "\n",
        "# Training loop\n",
        "epochs = 2\n",
        "t5_with_conv1d.to(device)\n",
        "t5_with_conv1d.train()\n",
        "\n",
        "for epoch in range(epochs):\n",
        "    total_loss = 0\n",
        "    for batch in tqdm(training_loader, desc=f'Epoch {epoch + 1}/{epochs}'):\n",
        "        input_ids = batch['input_ids'].to(device)\n",
        "        attention_mask = batch['attention_mask'].to(device)\n",
        "        labels = batch['labels'].to(device)\n",
        "\n",
        "        optimizer.zero_grad()\n",
        "\n",
        "        outputs = t5_with_conv1d(input_ids, attention_mask)\n",
        "        logits = outputs.view(-1, outputs.size(-1))\n",
        "\n",
        "        loss = criterion(logits, labels.view(-1))\n",
        "        total_loss += loss.item()\n",
        "\n",
        "        loss.backward()\n",
        "        optimizer.step()\n",
        "\n",
        "    avg_train_loss = total_loss / len(training_loader)\n",
        "    print(f'Epoch {epoch + 1}/{epochs}, Average Training Loss: {avg_train_loss}')\n",
        "\n",
        "# Save the trained model\n",
        "# torch.save(t5_with_conv1d.state_dict(), 't5_with_conv1d_model.pth')\n"
      ],
      "metadata": {
        "id": "poqjGktOKQJP"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}