diff --git "a/prediction_sinhala.ipynb" "b/prediction_sinhala.ipynb"
new file mode 100644--- /dev/null
+++ "b/prediction_sinhala.ipynb"
@@ -0,0 +1,3865 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "1edd00396f2d45a7b32079d43bc62634": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_a96994a464df43918566f6cc967e7148",
+ "IPY_MODEL_92143f1854c44349a3d0f6b7838b6a5c",
+ "IPY_MODEL_b90793e5e29c435cab6fd7b1e059c992"
+ ],
+ "layout": "IPY_MODEL_8ce96d68c1e443b28e1200f106fefb02"
+ }
+ },
+ "a96994a464df43918566f6cc967e7148": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dcfa2646664e449c98a00e89b2b7984d",
+ "placeholder": "",
+ "style": "IPY_MODEL_28bcfe01e8a64ba08ce62e9715ad85e4",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "92143f1854c44349a3d0f6b7838b6a5c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1b72e7f7e85a49fb8c7a79bce1989647",
+ "max": 1375,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_76523fc98b644aaaaf6c605544e9fffb",
+ "value": 1375
+ }
+ },
+ "b90793e5e29c435cab6fd7b1e059c992": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b2d2a9eabbe14cddaa7d0aa39e7a1953",
+ "placeholder": "",
+ "style": "IPY_MODEL_bf4db198f72441b48a5dbff8515a1f91",
+ "value": " 1.38k/1.38k [00:00<00:00, 36.2kB/s]"
+ }
+ },
+ "8ce96d68c1e443b28e1200f106fefb02": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dcfa2646664e449c98a00e89b2b7984d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "28bcfe01e8a64ba08ce62e9715ad85e4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1b72e7f7e85a49fb8c7a79bce1989647": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "76523fc98b644aaaaf6c605544e9fffb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b2d2a9eabbe14cddaa7d0aa39e7a1953": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "bf4db198f72441b48a5dbff8515a1f91": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "48e8e488c27a4948a455835f6caf2ce2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_2fb5a8ac30ed49df93056bc6802e8ee0",
+ "IPY_MODEL_c29fcea40de347bf9f274f375b9123a5",
+ "IPY_MODEL_00cd02215e1f4225a4cf93b46b9a7e15"
+ ],
+ "layout": "IPY_MODEL_e1064a6f8bfd4435a6ad15d08ff44699"
+ }
+ },
+ "2fb5a8ac30ed49df93056bc6802e8ee0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_956884e6e8bc43f4bc51a2b75c131889",
+ "placeholder": "",
+ "style": "IPY_MODEL_4e23a4eec35f4f8ea3114d9cb0ea1e04",
+ "value": "vocab.json: 100%"
+ }
+ },
+ "c29fcea40de347bf9f274f375b9123a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e7a3964adec34bf6b37f52cf1119fa9c",
+ "max": 1500217,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_22a128f583aa4514a1e71d0f8aaf8e79",
+ "value": 1500217
+ }
+ },
+ "00cd02215e1f4225a4cf93b46b9a7e15": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3f902b8cb652446c84609cd730a64e35",
+ "placeholder": "",
+ "style": "IPY_MODEL_a78b7664a1e346f181b203bb1645eb9b",
+ "value": " 1.50M/1.50M [00:00<00:00, 9.80MB/s]"
+ }
+ },
+ "e1064a6f8bfd4435a6ad15d08ff44699": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "956884e6e8bc43f4bc51a2b75c131889": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4e23a4eec35f4f8ea3114d9cb0ea1e04": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e7a3964adec34bf6b37f52cf1119fa9c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "22a128f583aa4514a1e71d0f8aaf8e79": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "3f902b8cb652446c84609cd730a64e35": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a78b7664a1e346f181b203bb1645eb9b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3bf7edeed06a4ef3b1ce28f24201c84b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_205cbd07f34345c48b1e72bc2cb9a93b",
+ "IPY_MODEL_a2d328e2313a49aab752cd2ba38220b5",
+ "IPY_MODEL_a3696eb89c4e434683bb5416d91602db"
+ ],
+ "layout": "IPY_MODEL_e04dfc9c2e5f437c8dd9b15f33c04a4a"
+ }
+ },
+ "205cbd07f34345c48b1e72bc2cb9a93b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1f5b368654494327bfc8d1c315f13832",
+ "placeholder": "",
+ "style": "IPY_MODEL_853eb13a56ec4fbf89e25f333798132e",
+ "value": "merges.txt: 100%"
+ }
+ },
+ "a2d328e2313a49aab752cd2ba38220b5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5713250ff3864029a3668c6a7eb1f3e3",
+ "max": 1146413,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_944ed217567144459ea5279c34f529f3",
+ "value": 1146413
+ }
+ },
+ "a3696eb89c4e434683bb5416d91602db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1e0faaa1a09f42f8964a3203472f50c2",
+ "placeholder": "",
+ "style": "IPY_MODEL_7c3da939876e4a6f8f2969fbf96bbcd0",
+ "value": " 1.15M/1.15M [00:00<00:00, 14.1MB/s]"
+ }
+ },
+ "e04dfc9c2e5f437c8dd9b15f33c04a4a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1f5b368654494327bfc8d1c315f13832": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "853eb13a56ec4fbf89e25f333798132e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5713250ff3864029a3668c6a7eb1f3e3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "944ed217567144459ea5279c34f529f3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "1e0faaa1a09f42f8964a3203472f50c2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7c3da939876e4a6f8f2969fbf96bbcd0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "aacafd29b7b5403bb8a7df1ebe2a731e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ec944b4365c34ab6813af9d925e2a552",
+ "IPY_MODEL_8d48d360da5945bfbf300ae455043c07",
+ "IPY_MODEL_36f7f5ae075f4c59a44283e25088eaab"
+ ],
+ "layout": "IPY_MODEL_8cfa54ddcf354e0e9f71102656a744cf"
+ }
+ },
+ "ec944b4365c34ab6813af9d925e2a552": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edcb22216cac4bcf83a301d975f20d2d",
+ "placeholder": "",
+ "style": "IPY_MODEL_3ace28614fe446f18268578e56b5ec14",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "8d48d360da5945bfbf300ae455043c07": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3c8f3c862f744ddba9524079c636124a",
+ "max": 3529879,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_36eb36dba13d4ad4a73b401e0dc22c42",
+ "value": 3529879
+ }
+ },
+ "36f7f5ae075f4c59a44283e25088eaab": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_236bb6124df1443684b16dd34fba2ed4",
+ "placeholder": "",
+ "style": "IPY_MODEL_3264137a43cc4725948aab030421b24c",
+ "value": " 3.53M/3.53M [00:00<00:00, 22.8MB/s]"
+ }
+ },
+ "8cfa54ddcf354e0e9f71102656a744cf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "edcb22216cac4bcf83a301d975f20d2d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3ace28614fe446f18268578e56b5ec14": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3c8f3c862f744ddba9524079c636124a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "36eb36dba13d4ad4a73b401e0dc22c42": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "236bb6124df1443684b16dd34fba2ed4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3264137a43cc4725948aab030421b24c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "dd76625672d74095a0f691206646fbd8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_a10c0e99afb546d79fca304e8a8e6ab1",
+ "IPY_MODEL_6fa7911781ef42949e56c80dc1f85299",
+ "IPY_MODEL_832e08b2b4524680bca9d71c363f3232"
+ ],
+ "layout": "IPY_MODEL_10ec65b4df86458ba2eee2eeccdc91e6"
+ }
+ },
+ "a10c0e99afb546d79fca304e8a8e6ab1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_764921df2d7b438b8a8ad0d7b68b8b6e",
+ "placeholder": "",
+ "style": "IPY_MODEL_12f7b959b26b472099ea4e06b606772a",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6fa7911781ef42949e56c80dc1f85299": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_32c6cb10b1e946a89a7b08505c1582e9",
+ "max": 957,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3ee11622c9a0405abf5f246720d358b2",
+ "value": 957
+ }
+ },
+ "832e08b2b4524680bca9d71c363f3232": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d78ced95d2134e299573c1e8d712b3f8",
+ "placeholder": "",
+ "style": "IPY_MODEL_6ff7b4b691a74f6ab232c26ee55b9982",
+ "value": " 957/957 [00:00<00:00, 43.5kB/s]"
+ }
+ },
+ "10ec65b4df86458ba2eee2eeccdc91e6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "764921df2d7b438b8a8ad0d7b68b8b6e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "12f7b959b26b472099ea4e06b606772a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "32c6cb10b1e946a89a7b08505c1582e9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3ee11622c9a0405abf5f246720d358b2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d78ced95d2134e299573c1e8d712b3f8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6ff7b4b691a74f6ab232c26ee55b9982": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2cc542103450405a853945ff07471932": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5807fb12507f4fc0aaa4d083477aba27",
+ "IPY_MODEL_11d2ce4ace194bb1825fefacf3cc36f0",
+ "IPY_MODEL_b22724e628384993aa52c255c8bffc6f"
+ ],
+ "layout": "IPY_MODEL_e4dc10b2ef9c4a1f99689ebe9d48886a"
+ }
+ },
+ "5807fb12507f4fc0aaa4d083477aba27": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6b580a221e2f48b5b058c6a6189cc99d",
+ "placeholder": "",
+ "style": "IPY_MODEL_e5f30b22e01c4b74a2f20f3b9880d7ae",
+ "value": "config.json: 100%"
+ }
+ },
+ "11d2ce4ace194bb1825fefacf3cc36f0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e353ffe187d94729a65453fcd3d8a9a3",
+ "max": 696,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_b056d5ef8b9244828e10f685e38d47bb",
+ "value": 696
+ }
+ },
+ "b22724e628384993aa52c255c8bffc6f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_32ce45ecf0d64c81bea12692ad52ed45",
+ "placeholder": "",
+ "style": "IPY_MODEL_b930858da1a2407ba49a8a8a17a1fc70",
+ "value": " 696/696 [00:00<00:00, 35.8kB/s]"
+ }
+ },
+ "e4dc10b2ef9c4a1f99689ebe9d48886a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6b580a221e2f48b5b058c6a6189cc99d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e5f30b22e01c4b74a2f20f3b9880d7ae": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e353ffe187d94729a65453fcd3d8a9a3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b056d5ef8b9244828e10f685e38d47bb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "32ce45ecf0d64c81bea12692ad52ed45": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b930858da1a2407ba49a8a8a17a1fc70": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d366271fc98943aa8fe3da314c1e95dc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_364ab880ac304099b1bb83a92d6a7eed",
+ "IPY_MODEL_4a8d5b1ca09d493c8b93baf92e7fd5ae",
+ "IPY_MODEL_c6867a46aa064e26831be8a3a4278905"
+ ],
+ "layout": "IPY_MODEL_053fc43d2aad4aeeb114b8fd9aa2aef6"
+ }
+ },
+ "364ab880ac304099b1bb83a92d6a7eed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a9583dca84a14e9bb89e58a430c391e3",
+ "placeholder": "",
+ "style": "IPY_MODEL_15b6e4ff685f4f5e8c997e151d2b4007",
+ "value": "pytorch_model.bin: 100%"
+ }
+ },
+ "4a8d5b1ca09d493c8b93baf92e7fd5ae": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d73e215040114141bc60b0f58a1f8646",
+ "max": 506353257,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_18ab1cf13c494b02af122ee0069e3c92",
+ "value": 506353257
+ }
+ },
+ "c6867a46aa064e26831be8a3a4278905": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_839cabb3e69c46549665769afcc24e08",
+ "placeholder": "",
+ "style": "IPY_MODEL_56fe41cf83454378a9345ee4e9a26192",
+ "value": " 506M/506M [00:09<00:00, 57.3MB/s]"
+ }
+ },
+ "053fc43d2aad4aeeb114b8fd9aa2aef6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a9583dca84a14e9bb89e58a430c391e3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "15b6e4ff685f4f5e8c997e151d2b4007": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d73e215040114141bc60b0f58a1f8646": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "18ab1cf13c494b02af122ee0069e3c92": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "839cabb3e69c46549665769afcc24e08": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "56fe41cf83454378a9345ee4e9a26192": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "EWlgdx43A_NL",
+ "outputId": "a2431529-0c26-4076-a6cf-4c9146c4f9b0"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Mounted at /content/drive\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "! pip install faknow\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "7dssDha5BGNL",
+ "outputId": "5d303d54-6c32-482d-83e7-c46a4506cbce"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting faknow\n",
+ " Downloading faknow-0.0.3-py3-none-any.whl (147 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.6/147.6 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: transformers>=4.26.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.38.2)\n",
+ "Requirement already satisfied: numpy>=1.23.4 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.25.2)\n",
+ "Requirement already satisfied: pandas>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.5.3)\n",
+ "Requirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.2.2)\n",
+ "Requirement already satisfied: tensorboard>=2.10.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (2.15.2)\n",
+ "Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.66.2)\n",
+ "Requirement already satisfied: jieba>=0.42.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.42.1)\n",
+ "Requirement already satisfied: gensim>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.3.2)\n",
+ "Requirement already satisfied: pillow>=9.3.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (9.4.0)\n",
+ "Requirement already satisfied: nltk>=3.7 in /usr/local/lib/python3.10/dist-packages (from faknow) (3.8.1)\n",
+ "Collecting sphinx-markdown-tables>=0.0.17 (from faknow)\n",
+ " Downloading sphinx_markdown_tables-0.0.17-py3-none-any.whl (28 kB)\n",
+ "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (1.11.4)\n",
+ "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (6.4.0)\n",
+ "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (8.1.7)\n",
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (1.3.2)\n",
+ "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (2023.12.25)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2023.4)\n",
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1.3->faknow) (3.3.0)\n",
+ "Requirement already satisfied: markdown>=3.4 in /usr/local/lib/python3.10/dist-packages (from sphinx-markdown-tables>=0.0.17->faknow) (3.5.2)\n",
+ "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.4.0)\n",
+ "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.62.0)\n",
+ "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.27.0)\n",
+ "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.2.0)\n",
+ "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.20.3)\n",
+ "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.31.0)\n",
+ "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (67.7.2)\n",
+ "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.16.0)\n",
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (0.7.2)\n",
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.0.1)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (3.13.1)\n",
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.20.3)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (23.2)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (6.0.1)\n",
+ "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.15.2)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.4.2)\n",
+ "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (5.3.3)\n",
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.3.0)\n",
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (4.9)\n",
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (1.3.1)\n",
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (2023.6.0)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (4.10.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.6)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2.0.7)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2024.2.2)\n",
+ "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->faknow) (2.1.5)\n",
+ "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.5.1)\n",
+ "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (3.2.2)\n",
+ "Installing collected packages: sphinx-markdown-tables, faknow\n",
+ "Successfully installed faknow-0.0.3 sphinx-markdown-tables-0.0.17\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "Zo3_tdxod_tn"
+ },
+ "outputs": [],
+ "source": [
+ "from typing import List, Optional, Tuple\n",
+ "\n",
+ "import torch\n",
+ "from torch import Tensor\n",
+ "from torch import nn\n",
+ "from transformers import RobertaModel\n",
+ "\n",
+ "from faknow.model.layers.layer import TextCNNLayer\n",
+ "from faknow.model.model import AbstractModel\n",
+ "import pandas as pd\n",
+ "\n",
+ "\n",
+ "class _MLP(nn.Module):\n",
+ " def __init__(self,\n",
+ " input_dim: int,\n",
+ " embed_dims: List[int],\n",
+ " dropout_rate: float,\n",
+ " output_layer=True):\n",
+ " super().__init__()\n",
+ " layers = list()\n",
+ " for embed_dim in embed_dims:\n",
+ " layers.append(nn.Linear(input_dim, embed_dim))\n",
+ " layers.append(nn.BatchNorm1d(embed_dim))\n",
+ " layers.append(nn.ReLU())\n",
+ " layers.append(nn.Dropout(p=dropout_rate))\n",
+ " input_dim = embed_dim\n",
+ " if output_layer:\n",
+ " layers.append(torch.nn.Linear(input_dim, 1))\n",
+ " self.mlp = torch.nn.Sequential(*layers)\n",
+ "\n",
+ " def forward(self, x: Tensor) -> Tensor:\n",
+ " \"\"\"\n",
+ "\n",
+ " Args:\n",
+ " x (Tensor): shared feature from domain and text, shape=(batch_size, embed_dim)\n",
+ "\n",
+ " \"\"\"\n",
+ " return self.mlp(x)\n",
+ "\n",
+ "\n",
+ "class _MaskAttentionLayer(torch.nn.Module):\n",
+ " \"\"\"\n",
+ " Compute attention layer\n",
+ " \"\"\"\n",
+ " def __init__(self, input_size: int):\n",
+ " super(_MaskAttentionLayer, self).__init__()\n",
+ " self.attention_layer = torch.nn.Linear(input_size, 1)\n",
+ "\n",
+ " def forward(self,\n",
+ " inputs: Tensor,\n",
+ " mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:\n",
+ " weights = self.attention_layer(inputs).view(-1, inputs.size(1))\n",
+ " if mask is not None:\n",
+ " weights = weights.masked_fill(mask == 0, float(\"-inf\"))\n",
+ " weights = torch.softmax(weights, dim=-1).unsqueeze(1)\n",
+ " outputs = torch.matmul(weights, inputs).squeeze(1)\n",
+ " return outputs, weights\n",
+ "\n",
+ "\n",
+ "class MDFEND(AbstractModel):\n",
+ " r\"\"\"\n",
+ " MDFEND: Multi-domain Fake News Detection, CIKM 2021\n",
+ " paper: https://dl.acm.org/doi/10.1145/3459637.3482139\n",
+ " code: https://github.com/kennqiang/MDFEND-Weibo21\n",
+ " \"\"\"\n",
+ " def __init__(self,\n",
+ " pre_trained_bert_name: str,\n",
+ " domain_num: int,\n",
+ " mlp_dims: Optional[List[int]] = None,\n",
+ " dropout_rate=0.2,\n",
+ " expert_num=5):\n",
+ " \"\"\"\n",
+ "\n",
+ " Args:\n",
+ " pre_trained_bert_name (str): the name or local path of pre-trained bert model\n",
+ " domain_num (int): total number of all domains\n",
+ " mlp_dims (List[int]): a list of the dimensions in MLP layer, if None, [384] will be taken as default, default=384\n",
+ " dropout_rate (float): rate of Dropout layer, default=0.2\n",
+ " expert_num (int): number of experts also called TextCNNLayer, default=5\n",
+ " \"\"\"\n",
+ " super(MDFEND, self).__init__()\n",
+ " self.domain_num = domain_num\n",
+ " self.expert_num = expert_num\n",
+ " self.bert = RobertaModel.from_pretrained(\n",
+ " pre_trained_bert_name).requires_grad_(False)\n",
+ " self.embedding_size = self.bert.config.hidden_size\n",
+ " self.loss_func = nn.BCELoss()\n",
+ " if mlp_dims is None:\n",
+ " mlp_dims = [384]\n",
+ "\n",
+ " filter_num = 64\n",
+ " filter_sizes = [1, 2, 3, 5, 10]\n",
+ " experts = [\n",
+ " TextCNNLayer(self.embedding_size, filter_num, filter_sizes)\n",
+ " for _ in range(self.expert_num)\n",
+ " ]\n",
+ " self.experts = nn.ModuleList(experts)\n",
+ "\n",
+ " self.gate = nn.Sequential(\n",
+ " nn.Linear(self.embedding_size * 2, mlp_dims[-1]), nn.ReLU(),\n",
+ " nn.Linear(mlp_dims[-1], self.expert_num), nn.Softmax(dim=1))\n",
+ "\n",
+ " self.attention = _MaskAttentionLayer(self.embedding_size)\n",
+ "\n",
+ " self.domain_embedder = nn.Embedding(num_embeddings=self.domain_num,\n",
+ " embedding_dim=self.embedding_size)\n",
+ " self.classifier = _MLP(320, mlp_dims, dropout_rate)\n",
+ "\n",
+ " def forward(self, token_id: Tensor, mask: Tensor,\n",
+ " domain: Tensor) -> Tensor:\n",
+ " \"\"\"\n",
+ "\n",
+ " Args:\n",
+ " token_id (Tensor): token ids from bert tokenizer, shape=(batch_size, max_len)\n",
+ " mask (Tensor): mask from bert tokenizer, shape=(batch_size, max_len)\n",
+ " domain (Tensor): domain id, shape=(batch_size,)\n",
+ "\n",
+ " Returns:\n",
+ " FloatTensor: the prediction of being fake, shape=(batch_size,)\n",
+ " \"\"\"\n",
+ " text_embedding = self.bert(token_id,\n",
+ " attention_mask=mask).last_hidden_state\n",
+ " attention_feature, _ = self.attention(text_embedding, mask)\n",
+ "\n",
+ " domain_embedding = self.domain_embedder(domain.view(-1, 1)).squeeze(1)\n",
+ "\n",
+ " gate_input = torch.cat([domain_embedding, attention_feature], dim=-1)\n",
+ " gate_output = self.gate(gate_input)\n",
+ "\n",
+ " shared_feature = 0\n",
+ " for i in range(self.expert_num):\n",
+ " expert_feature = self.experts[i](text_embedding)\n",
+ " shared_feature += (expert_feature * gate_output[:, i].unsqueeze(1))\n",
+ "\n",
+ " label_pred = self.classifier(shared_feature)\n",
+ "\n",
+ " return torch.sigmoid(label_pred.squeeze(1))\n",
+ "\n",
+ " def calculate_loss(self, data) -> Tensor:\n",
+ " \"\"\"\n",
+ " calculate loss via BCELoss\n",
+ "\n",
+ " Args:\n",
+ " data (dict): batch data dict\n",
+ "\n",
+ " Returns:\n",
+ " loss (Tensor): loss value\n",
+ " \"\"\"\n",
+ "\n",
+ " token_ids = data['text']['token_id']\n",
+ " masks = data['text']['mask']\n",
+ " domains = data['domain']\n",
+ " labels = data['label']\n",
+ " output = self.forward(token_ids, masks, domains)\n",
+ " return self.loss_func(output, labels.float())\n",
+ "\n",
+ " def predict(self, data_without_label) -> Tensor:\n",
+ " \"\"\"\n",
+ " predict the probability of being fake news\n",
+ "\n",
+ " Args:\n",
+ " data_without_label (Dict[str, Any]): batch data dict\n",
+ "\n",
+ " Returns:\n",
+ " Tensor: one-hot probability, shape=(batch_size, 2)\n",
+ " \"\"\"\n",
+ "\n",
+ " token_ids = data_without_label['text']['token_id']\n",
+ " masks = data_without_label['text']['mask']\n",
+ " domains = data_without_label['domain']\n",
+ "\n",
+ "\n",
+ " output_prob = self.forward(token_ids, masks,domains)\n",
+ "\n",
+ " return output_prob"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from faknow.data.dataset.text import TextDataset\n",
+ "from faknow.data.process.text_process import TokenizerFromPreTrained\n",
+ "from faknow.evaluate.evaluator import Evaluator\n",
+ "\n",
+ "import torch\n",
+ "from torch.utils.data import DataLoader"
+ ],
+ "metadata": {
+ "id": "Tg2zBjzUBTbt"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "testing_path = \"/content/drive/MyDrive/sinhala-dataset/test_data.json\"\n"
+ ],
+ "metadata": {
+ "id": "Ls-xo82WBbUg"
+ },
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = pd.read_json(testing_path)\n",
+ "df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "by3bnTMCMh6K",
+ "outputId": "bdc10951-f15e-4918-b7cd-84dd5535b4e5"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " text domain label\n",
+ "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 1\n",
+ "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n",
+ "2 ඒකි ඒම නෑ බං # jaysays 0 0\n",
+ "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 1\n",
+ "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " text | \n",
+ " domain | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ඒකි ඒම නෑ බං # jaysays | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5000,\n \"samples\": [\n \"\\u0d87\\u0dba\\u0dd2 \\u0dba\\u0d9a\\u0ddd \\u0dbd\\u0d82\\u0d9a\\u0dcf\\u0dc0\\u0dda \\u0db6\\u0dd9\\u0dbb\\u0dd2 ! \\u0daf\\u0dd3\\u0db4\\u0dbd\\u0dca\\u0dbd\\u0dcf # \\u0dc0\\u0dd9\\u0dbb\\u0dd2 # \\u0d9c\\u0dbd\\u0dca\\u0db6\\u0ddd\\u0dad\\u0dbd\\u0dca # GenElecSL # SriLanka # TamilNadu # Election2015\",\n \"@USER @USER \\u0d9a\\u0dcf\\u0dbd\\u0d9a\\u0db1\\u0dca\\u0db1\\u0dd2 \\u0d95\\u0d9a\\u0dd4\\u0db1\\u0d9c\\u0dd9 \\u0db4\\u0dd4\\u0d9a \\u0db8\\u0dc4\\u0dbd\\u0dcf \\u0db6\\u0da9\\u0dba\\u0db1\\u0dca\\u0db1 \\u0db6\\u0dd9\\u0dc4\\u0dd9\\u0dad\\u0dca \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0d95\\u0db1\\u0dcf\",\n \"\\u0dc3\\u0dd3\\u0dbd\\u0dcf\\u0dc0\\u0dad\\u0dd4\\u0dbb \\u0db1\\u0dcf / \\u0dc3\\u0dd9\\u0db6\\u0dc5\\u0dd4 \\u0dad\\u0dd2\\u0daf\\u0dd9\\u0db1\\u0d9a\\u0dd4\\u0da7 \\u0db4\\u0dc4\\u0dbb\\u0daf\\u0dd3 \\u0dc3\\u0dd9\\u0db6\\u0dbd\\u0dd9\\u0d9a\\u0dca \\u0dc4\\u0dd2\\u0dbb\\u0d9a\\u0dbb\\u0dba\\u0dd2 \\u0db6\\u0dda\\u0dbb\\u0dcf\\u0d9c\\u0dad\\u0dca\\u0dad\\u0dd9 \\u0db1\\u0dd3\\u0dad\\u0dd2\\u0db8\\u0dba \\u0db4\\u0dd2\\u0dba\\u0dc0\\u0dbb \\u0db1\\u0ddc\\u0d9c\\u0db1\\u0dca\\u0db1 \\u0db4\\u0ddc\\u0dbb\\u0ddc\\u0db1\\u0dca\\u0daf\\u0dd4\\u0dc0 \\u0db8\\u0dad\\u0dba \\u0db1\\u0ddc\\u0daf\\u0d9a\\u0dd2\\u0db1\\u0dca URL via @USER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df =df[:100]"
+ ],
+ "metadata": {
+ "id": "LX0T74ZtM9j9"
+ },
+ "execution_count": 8,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df[\"label\"] = int(0)"
+ ],
+ "metadata": {
+ "id": "60iL_I8ONCts"
+ },
+ "execution_count": 9,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "SE4yeguZNIo-",
+ "outputId": "110eb559-0dd0-4f2e-cb1c-694100365a31"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " text domain label\n",
+ "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 0\n",
+ "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n",
+ "2 ඒකි ඒම නෑ බං # jaysays 0 0\n",
+ "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 0\n",
+ "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " text | \n",
+ " domain | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ඒකි ඒම නෑ බං # jaysays | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"\\u0d89\\u0dc3\\u0dca\\u0dc3\\u0dbb \\u0d8b\\u0db6 \\u0dc0\\u0dbd\\u0dd2\\u0dba\\u0d9a\\u0dca \\u0db1\\u0db8\\u0dca \\u0dbd\\u0ddc\\u0dc0\\u0dd9\\u0dad\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0ddc \\u0d9c\\u0dd2\\u0dc4\\u0dcf\\u0db1\\u0dca \\u0d85\\u0dba\\u0dd2\\u0dba\\u0dcf \\u0d89\\u0d9a\\u0dca\\u0db8\\u0db1\\u0da7 \\u0dc0\\u0dbb\\u0dd9\\u0db1\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0dbd\\u0dcf \\u0d85\\u0da9 \\u0d9c\\u0dc4\\u0dbd\\u0dcf \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0dca\\u0db1\\u0dda \\u0dad\\u0ddc \\u0daf\\u0dd0\\u0db1\\u0dca \\u0dc0\\u0dd9\\u0db1\\u0dc3\\u0dca \\u0d9a\\u0db8\\u0dca \\u0d9a\\u0dbb\\u0db1\\u0dc0\\u0dcf \\u0db8\\u0db1\\u0dca \\u0dad\\u0ddc\\u0da7 \\u0dc0\\u0ddb\\u0dbb\\u0dba\",\n \"\\u0d85\\u0db1\\u0dd4\\u0dbb\\u0d9c\\u0dd9 \\u0dc3\\u0da7\\u0dca\\u0da7\\u0dd0\\u0db9\\u0dd2\\u0dba\\u0db1\\u0dca\\u0d9c\\u0dd9 \\u0d89\\u0dad\\u0dca\\u0dad\\u0dd1\\u0dc0\\u0ddc \\u0daf\\u0dd4\\u0d91\\u0d85\\u0db1 \\u0d91\\u0d9a \\u0d85\\u0dc4\\u0db1\\u0dca\\u0db1\\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca\\u0daf \\u0dad\\u0dc0 \\u0db6\\u0dd0\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0da7\\u0dca\\u0d9c\\u0dc0 \\u0db8\\u0da9\\u0d9c\\u0dc4\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0d8b\\u0db9\\u0dbd\\u0d9c\\u0dd9 \\u0dc3\\u0d9a\\u0dca\\u0d9a\\u0dd2\\u0dbd\\u0dd2 \\u0db4\\u0dbb \\u0d9c\\u0dad\\u0dd2 \\u0dbd\\u0ddd\\u0d9a\\u0dd9\\u0da7\\u0db8 \\u0db4\\u0dd9\\u0db1\\u0dca\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd . % \\u0da7 \\u0d87\\u0daf\\u0dbd \\u0daf\\u0dd0\\u0db8\\u0dca\\u0db8\\u0dd9\\u0dad\\u0dca \\u0db8\\u0dda\\u0d9a\\u0dd9 \\u0d87\\u0db8\\u0db1\\u0dd9\\u0db1 \\u0dc3\\u0dd2\\u0db4\\u0dca\\u0db4\\u0dd2\\u0d9a\\u0da7\\u0dd4\",\n \"\\u0db8\\u0dbd\\u0dca\\u0dbd\\u0dd2\\u0d9c\\u0dd9 \\u0d8b\\u0db4\\u0db1\\u0dca\\u0daf\\u0dd2\\u0db1\\u0dda\\u0da7 \\u0dc4\\u0dd9\\u0da7 \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca \\u0d9a\\u0dbd\\u0dca\\u0db4\\u0db1\\u0dcf \\u0d9a\\u0dbb \\u0d9a\\u0dbb \\u0d89\\u0daf\\u0dca\\u0daf\\u0dd2 \\u0dbd\\u0ddc\\u0d9a\\u0dca\\u0d9a\\u0dcf \\u0daf\\u0dd4\\u0db1\\u0dca\\u0db1 \\u0dc3\\u0db4\\u0dca\\u200d\\u0dbb\\u0dcf\\u0dba\\u0dd2\\u0dc3\\u0dca \\u0d91\\u0d9a \\u0db1\\u0db8\\u0dca \\u0db4\\u0da7\\u0dca\\u0da7 . . \\u0dbb\\u0dad\\u0dd2\\u0da4\\u0dca\\u0da4\\u0dcf \\u0db4\\u0dd9\\u0da7\\u0dca\\u0da7\\u0dd2\\u0dba\\u0d9a\\u0dd2\\u0db1\\u0dca \\u0dc2\\u0dda\\u0db4\\u0dca \\u0d8b\\u0db1\\u0dd1\\u0d9a\\u0dd2 \\u0dc4\\u0dd0\\u0db8\\u0dd2\\u0db1\\u0dda\\u0dc2\\u0db1\\u0dca ourNation HappyBirthday\\u0d92\\u0d85\\u0dc3\\u0dca\\u0dc3\\u0dda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(len(df))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "zTRfsZ_tNLif",
+ "outputId": "d0012de3-5298-4be5-b280-dee66208a034"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "100\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "path = '/content/drive/MyDrive/sinhala-dataset'\n",
+ "testing_json = \"/testing.json\""
+ ],
+ "metadata": {
+ "id": "weZ2_xujNW1b"
+ },
+ "execution_count": 12,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.to_json(path + testing_json, orient='records')\n"
+ ],
+ "metadata": {
+ "id": "HzAfca0LNUDx"
+ },
+ "execution_count": 13,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 400,
+ "referenced_widgets": [
+ "1edd00396f2d45a7b32079d43bc62634",
+ "a96994a464df43918566f6cc967e7148",
+ "92143f1854c44349a3d0f6b7838b6a5c",
+ "b90793e5e29c435cab6fd7b1e059c992",
+ "8ce96d68c1e443b28e1200f106fefb02",
+ "dcfa2646664e449c98a00e89b2b7984d",
+ "28bcfe01e8a64ba08ce62e9715ad85e4",
+ "1b72e7f7e85a49fb8c7a79bce1989647",
+ "76523fc98b644aaaaf6c605544e9fffb",
+ "b2d2a9eabbe14cddaa7d0aa39e7a1953",
+ "bf4db198f72441b48a5dbff8515a1f91",
+ "48e8e488c27a4948a455835f6caf2ce2",
+ "2fb5a8ac30ed49df93056bc6802e8ee0",
+ "c29fcea40de347bf9f274f375b9123a5",
+ "00cd02215e1f4225a4cf93b46b9a7e15",
+ "e1064a6f8bfd4435a6ad15d08ff44699",
+ "956884e6e8bc43f4bc51a2b75c131889",
+ "4e23a4eec35f4f8ea3114d9cb0ea1e04",
+ "e7a3964adec34bf6b37f52cf1119fa9c",
+ "22a128f583aa4514a1e71d0f8aaf8e79",
+ "3f902b8cb652446c84609cd730a64e35",
+ "a78b7664a1e346f181b203bb1645eb9b",
+ "3bf7edeed06a4ef3b1ce28f24201c84b",
+ "205cbd07f34345c48b1e72bc2cb9a93b",
+ "a2d328e2313a49aab752cd2ba38220b5",
+ "a3696eb89c4e434683bb5416d91602db",
+ "e04dfc9c2e5f437c8dd9b15f33c04a4a",
+ "1f5b368654494327bfc8d1c315f13832",
+ "853eb13a56ec4fbf89e25f333798132e",
+ "5713250ff3864029a3668c6a7eb1f3e3",
+ "944ed217567144459ea5279c34f529f3",
+ "1e0faaa1a09f42f8964a3203472f50c2",
+ "7c3da939876e4a6f8f2969fbf96bbcd0",
+ "aacafd29b7b5403bb8a7df1ebe2a731e",
+ "ec944b4365c34ab6813af9d925e2a552",
+ "8d48d360da5945bfbf300ae455043c07",
+ "36f7f5ae075f4c59a44283e25088eaab",
+ "8cfa54ddcf354e0e9f71102656a744cf",
+ "edcb22216cac4bcf83a301d975f20d2d",
+ "3ace28614fe446f18268578e56b5ec14",
+ "3c8f3c862f744ddba9524079c636124a",
+ "36eb36dba13d4ad4a73b401e0dc22c42",
+ "236bb6124df1443684b16dd34fba2ed4",
+ "3264137a43cc4725948aab030421b24c",
+ "dd76625672d74095a0f691206646fbd8",
+ "a10c0e99afb546d79fca304e8a8e6ab1",
+ "6fa7911781ef42949e56c80dc1f85299",
+ "832e08b2b4524680bca9d71c363f3232",
+ "10ec65b4df86458ba2eee2eeccdc91e6",
+ "764921df2d7b438b8a8ad0d7b68b8b6e",
+ "12f7b959b26b472099ea4e06b606772a",
+ "32c6cb10b1e946a89a7b08505c1582e9",
+ "3ee11622c9a0405abf5f246720d358b2",
+ "d78ced95d2134e299573c1e8d712b3f8",
+ "6ff7b4b691a74f6ab232c26ee55b9982",
+ "2cc542103450405a853945ff07471932",
+ "5807fb12507f4fc0aaa4d083477aba27",
+ "11d2ce4ace194bb1825fefacf3cc36f0",
+ "b22724e628384993aa52c255c8bffc6f",
+ "e4dc10b2ef9c4a1f99689ebe9d48886a",
+ "6b580a221e2f48b5b058c6a6189cc99d",
+ "e5f30b22e01c4b74a2f20f3b9880d7ae",
+ "e353ffe187d94729a65453fcd3d8a9a3",
+ "b056d5ef8b9244828e10f685e38d47bb",
+ "32ce45ecf0d64c81bea12692ad52ed45",
+ "b930858da1a2407ba49a8a8a17a1fc70",
+ "d366271fc98943aa8fe3da314c1e95dc",
+ "364ab880ac304099b1bb83a92d6a7eed",
+ "4a8d5b1ca09d493c8b93baf92e7fd5ae",
+ "c6867a46aa064e26831be8a3a4278905",
+ "053fc43d2aad4aeeb114b8fd9aa2aef6",
+ "a9583dca84a14e9bb89e58a430c391e3",
+ "15b6e4ff685f4f5e8c997e151d2b4007",
+ "d73e215040114141bc60b0f58a1f8646",
+ "18ab1cf13c494b02af122ee0069e3c92",
+ "839cabb3e69c46549665769afcc24e08",
+ "56fe41cf83454378a9345ee4e9a26192"
+ ]
+ },
+ "id": "ROUE4LV1d_tp",
+ "outputId": "7f3d865e-97c6-434e-a8ae-f69a3462586a"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:80: UserWarning: \n",
+ "Access to the secret `HF_TOKEN` has not been granted on this notebook.\n",
+ "You will not be requested again.\n",
+ "Please restart the session if you want to be prompted again.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/1.38k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "1edd00396f2d45a7b32079d43bc62634"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "vocab.json: 0%| | 0.00/1.50M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "48e8e488c27a4948a455835f6caf2ce2"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "merges.txt: 0%| | 0.00/1.15M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "3bf7edeed06a4ef3b1ce28f24201c84b"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/3.53M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "aacafd29b7b5403bb8a7df1ebe2a731e"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/957 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "dd76625672d74095a0f691206646fbd8"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "config.json: 0%| | 0.00/696 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "2cc542103450405a853945ff07471932"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "pytorch_model.bin: 0%| | 0.00/506M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "d366271fc98943aa8fe3da314c1e95dc"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
+ " return self.fget.__get__(instance, owner)()\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ],
+ "source": [
+ "MODEL_SAVE_PATH = \"/content/drive/MyDrive/models-path-improvement/last-epoch-model-2024-03-08-15_34_03_6.pth\"\n",
+ "\n",
+ "max_len, bert = 160 , 'sinhala-nlp/sinbert-sold-si'\n",
+ "tokenizer = TokenizerFromPreTrained(max_len, bert)\n",
+ "\n",
+ "# dataset\n",
+ "batch_size = 100\n",
+ "\n",
+ "\n",
+ "testing_path = path + testing_json\n",
+ "\n",
+ "testing_set = TextDataset(testing_path, ['text'], tokenizer)\n",
+ "testing_loader = DataLoader(testing_set, batch_size, shuffle=False)\n",
+ "\n",
+ "# prepare model\n",
+ "domain_num = 3\n",
+ "\n",
+ "model = MDFEND(bert, domain_num , expert_num=18 , mlp_dims = [5080 ,4020, 3010, 2024 ,1012 ,606 , 400])\n",
+ "model.load_state_dict(torch.load(f=MODEL_SAVE_PATH, map_location=torch.device('cpu')))\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "outputs = []\n",
+ "for batch_data in testing_loader:\n",
+ " outputs.append(model.predict(batch_data))"
+ ],
+ "metadata": {
+ "id": "nsTmmtm7ENK7"
+ },
+ "execution_count": 15,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "outputs"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MgJFRW6uOTNK",
+ "outputId": "37176cb8-b2e6-4c3b-c852-c4e3a17cda30"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[tensor([1.3248e-03, 2.0616e-01, 4.5341e-02, 9.3156e-01, 2.3167e-01, 9.9967e-01,\n",
+ " 6.8980e-02, 8.8265e-01, 4.6962e-01, 1.4711e-01, 3.9079e-01, 1.5254e-02,\n",
+ " 1.4336e-01, 9.9974e-01, 9.4320e-02, 9.6368e-01, 3.0400e-01, 1.1099e-02,\n",
+ " 8.6662e-01, 9.0376e-02, 4.0686e-01, 9.9839e-01, 9.9700e-01, 4.9826e-02,\n",
+ " 9.6036e-01, 3.1445e-02, 7.8756e-01, 5.2800e-01, 9.4090e-01, 9.9148e-01,\n",
+ " 9.9725e-01, 1.6041e-02, 2.9223e-01, 1.5572e-01, 7.2350e-02, 8.2344e-02,\n",
+ " 5.4701e-03, 7.9817e-01, 1.6082e-03, 2.3789e-01, 2.0766e-02, 9.8514e-01,\n",
+ " 1.4062e-02, 9.8410e-01, 5.0685e-01, 1.0039e-01, 3.5957e-01, 4.6990e-01,\n",
+ " 6.0348e-01, 5.4888e-01, 9.7326e-02, 1.4647e-03, 2.0198e-02, 9.9995e-01,\n",
+ " 8.6098e-01, 7.3051e-01, 3.0538e-03, 9.9967e-01, 5.4075e-03, 2.4586e-02,\n",
+ " 2.1326e-01, 9.9988e-01, 7.7565e-01, 7.3468e-01, 8.2214e-02, 8.3052e-03,\n",
+ " 3.7278e-01, 3.6124e-01, 2.4839e-01, 2.4560e-01, 3.9281e-02, 9.9611e-01,\n",
+ " 2.3351e-02, 1.9584e-01, 1.1381e-01, 2.4559e-01, 6.5344e-01, 3.5736e-01,\n",
+ " 8.6219e-04, 3.8071e-01, 5.6490e-01, 2.5499e-02, 6.1897e-02, 9.0802e-01,\n",
+ " 8.2842e-02, 8.5183e-04, 6.4453e-01, 6.0612e-01, 3.8544e-01, 4.0832e-02,\n",
+ " 6.0973e-01, 4.7808e-02, 7.0927e-01, 8.4603e-01, 5.6889e-01, 5.3337e-01,\n",
+ " 1.2113e-01, 8.6022e-01, 8.5642e-03, 9.9990e-01],\n",
+ " grad_fn=)]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# 1 ====> offensive\n",
+ "# 0 ====> not offensive\n",
+ "label = []\n",
+ "for output in outputs:\n",
+ " for out in output:\n",
+ " output_prob = out.item()\n",
+ " if output_prob >= 0.5:\n",
+ " label.append(1)\n",
+ " else:\n",
+ " label.append(0)\n"
+ ],
+ "metadata": {
+ "id": "ySdut6vMOvQY"
+ },
+ "execution_count": 20,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "label"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8thgk3ykPTOD",
+ "outputId": "fb05dd91-01d4-44a8-ef66-caf0eda24831"
+ },
+ "execution_count": 21,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1,\n",
+ " 0,\n",
+ " 1]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "P4eIsw1DPYQG"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file