{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "1edd00396f2d45a7b32079d43bc62634": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a96994a464df43918566f6cc967e7148", "IPY_MODEL_92143f1854c44349a3d0f6b7838b6a5c", "IPY_MODEL_b90793e5e29c435cab6fd7b1e059c992" ], "layout": "IPY_MODEL_8ce96d68c1e443b28e1200f106fefb02" } }, "a96994a464df43918566f6cc967e7148": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dcfa2646664e449c98a00e89b2b7984d", "placeholder": "​", "style": "IPY_MODEL_28bcfe01e8a64ba08ce62e9715ad85e4", "value": "tokenizer_config.json: 100%" } }, "92143f1854c44349a3d0f6b7838b6a5c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1b72e7f7e85a49fb8c7a79bce1989647", "max": 1375, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_76523fc98b644aaaaf6c605544e9fffb", "value": 1375 } }, "b90793e5e29c435cab6fd7b1e059c992": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b2d2a9eabbe14cddaa7d0aa39e7a1953", "placeholder": "​", "style": "IPY_MODEL_bf4db198f72441b48a5dbff8515a1f91", "value": " 1.38k/1.38k [00:00<00:00, 36.2kB/s]" } }, "8ce96d68c1e443b28e1200f106fefb02": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dcfa2646664e449c98a00e89b2b7984d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "28bcfe01e8a64ba08ce62e9715ad85e4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1b72e7f7e85a49fb8c7a79bce1989647": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "76523fc98b644aaaaf6c605544e9fffb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b2d2a9eabbe14cddaa7d0aa39e7a1953": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bf4db198f72441b48a5dbff8515a1f91": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "48e8e488c27a4948a455835f6caf2ce2": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2fb5a8ac30ed49df93056bc6802e8ee0", "IPY_MODEL_c29fcea40de347bf9f274f375b9123a5", "IPY_MODEL_00cd02215e1f4225a4cf93b46b9a7e15" ], "layout": "IPY_MODEL_e1064a6f8bfd4435a6ad15d08ff44699" } }, "2fb5a8ac30ed49df93056bc6802e8ee0": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_956884e6e8bc43f4bc51a2b75c131889", "placeholder": "​", "style": "IPY_MODEL_4e23a4eec35f4f8ea3114d9cb0ea1e04", "value": "vocab.json: 100%" } }, "c29fcea40de347bf9f274f375b9123a5": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e7a3964adec34bf6b37f52cf1119fa9c", "max": 1500217, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_22a128f583aa4514a1e71d0f8aaf8e79", "value": 1500217 } }, "00cd02215e1f4225a4cf93b46b9a7e15": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3f902b8cb652446c84609cd730a64e35", "placeholder": "​", "style": "IPY_MODEL_a78b7664a1e346f181b203bb1645eb9b", "value": " 1.50M/1.50M [00:00<00:00, 9.80MB/s]" } }, "e1064a6f8bfd4435a6ad15d08ff44699": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "956884e6e8bc43f4bc51a2b75c131889": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4e23a4eec35f4f8ea3114d9cb0ea1e04": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e7a3964adec34bf6b37f52cf1119fa9c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "22a128f583aa4514a1e71d0f8aaf8e79": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3f902b8cb652446c84609cd730a64e35": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a78b7664a1e346f181b203bb1645eb9b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3bf7edeed06a4ef3b1ce28f24201c84b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_205cbd07f34345c48b1e72bc2cb9a93b", "IPY_MODEL_a2d328e2313a49aab752cd2ba38220b5", "IPY_MODEL_a3696eb89c4e434683bb5416d91602db" ], "layout": "IPY_MODEL_e04dfc9c2e5f437c8dd9b15f33c04a4a" } }, "205cbd07f34345c48b1e72bc2cb9a93b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1f5b368654494327bfc8d1c315f13832", "placeholder": "​", "style": "IPY_MODEL_853eb13a56ec4fbf89e25f333798132e", "value": "merges.txt: 100%" } }, "a2d328e2313a49aab752cd2ba38220b5": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5713250ff3864029a3668c6a7eb1f3e3", "max": 1146413, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_944ed217567144459ea5279c34f529f3", "value": 1146413 } }, "a3696eb89c4e434683bb5416d91602db": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1e0faaa1a09f42f8964a3203472f50c2", "placeholder": "​", "style": "IPY_MODEL_7c3da939876e4a6f8f2969fbf96bbcd0", "value": " 1.15M/1.15M [00:00<00:00, 14.1MB/s]" } }, "e04dfc9c2e5f437c8dd9b15f33c04a4a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1f5b368654494327bfc8d1c315f13832": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "853eb13a56ec4fbf89e25f333798132e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5713250ff3864029a3668c6a7eb1f3e3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "944ed217567144459ea5279c34f529f3": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "1e0faaa1a09f42f8964a3203472f50c2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7c3da939876e4a6f8f2969fbf96bbcd0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "aacafd29b7b5403bb8a7df1ebe2a731e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ec944b4365c34ab6813af9d925e2a552", "IPY_MODEL_8d48d360da5945bfbf300ae455043c07", "IPY_MODEL_36f7f5ae075f4c59a44283e25088eaab" ], "layout": "IPY_MODEL_8cfa54ddcf354e0e9f71102656a744cf" } }, "ec944b4365c34ab6813af9d925e2a552": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_edcb22216cac4bcf83a301d975f20d2d", "placeholder": "​", "style": "IPY_MODEL_3ace28614fe446f18268578e56b5ec14", "value": "tokenizer.json: 100%" } }, "8d48d360da5945bfbf300ae455043c07": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3c8f3c862f744ddba9524079c636124a", "max": 3529879, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_36eb36dba13d4ad4a73b401e0dc22c42", "value": 3529879 } }, "36f7f5ae075f4c59a44283e25088eaab": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_236bb6124df1443684b16dd34fba2ed4", "placeholder": "​", "style": "IPY_MODEL_3264137a43cc4725948aab030421b24c", "value": " 3.53M/3.53M [00:00<00:00, 22.8MB/s]" } }, "8cfa54ddcf354e0e9f71102656a744cf": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "edcb22216cac4bcf83a301d975f20d2d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3ace28614fe446f18268578e56b5ec14": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3c8f3c862f744ddba9524079c636124a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "36eb36dba13d4ad4a73b401e0dc22c42": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "236bb6124df1443684b16dd34fba2ed4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3264137a43cc4725948aab030421b24c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "dd76625672d74095a0f691206646fbd8": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a10c0e99afb546d79fca304e8a8e6ab1", "IPY_MODEL_6fa7911781ef42949e56c80dc1f85299", "IPY_MODEL_832e08b2b4524680bca9d71c363f3232" ], "layout": "IPY_MODEL_10ec65b4df86458ba2eee2eeccdc91e6" } }, "a10c0e99afb546d79fca304e8a8e6ab1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_764921df2d7b438b8a8ad0d7b68b8b6e", "placeholder": "​", "style": "IPY_MODEL_12f7b959b26b472099ea4e06b606772a", "value": "special_tokens_map.json: 100%" } }, "6fa7911781ef42949e56c80dc1f85299": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_32c6cb10b1e946a89a7b08505c1582e9", "max": 957, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3ee11622c9a0405abf5f246720d358b2", "value": 957 } }, "832e08b2b4524680bca9d71c363f3232": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d78ced95d2134e299573c1e8d712b3f8", "placeholder": "​", "style": "IPY_MODEL_6ff7b4b691a74f6ab232c26ee55b9982", "value": " 957/957 [00:00<00:00, 43.5kB/s]" } }, "10ec65b4df86458ba2eee2eeccdc91e6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "764921df2d7b438b8a8ad0d7b68b8b6e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "12f7b959b26b472099ea4e06b606772a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "32c6cb10b1e946a89a7b08505c1582e9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3ee11622c9a0405abf5f246720d358b2": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d78ced95d2134e299573c1e8d712b3f8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6ff7b4b691a74f6ab232c26ee55b9982": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2cc542103450405a853945ff07471932": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_5807fb12507f4fc0aaa4d083477aba27", "IPY_MODEL_11d2ce4ace194bb1825fefacf3cc36f0", "IPY_MODEL_b22724e628384993aa52c255c8bffc6f" ], "layout": "IPY_MODEL_e4dc10b2ef9c4a1f99689ebe9d48886a" } }, "5807fb12507f4fc0aaa4d083477aba27": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6b580a221e2f48b5b058c6a6189cc99d", "placeholder": "​", "style": "IPY_MODEL_e5f30b22e01c4b74a2f20f3b9880d7ae", "value": "config.json: 100%" } }, "11d2ce4ace194bb1825fefacf3cc36f0": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e353ffe187d94729a65453fcd3d8a9a3", "max": 696, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b056d5ef8b9244828e10f685e38d47bb", "value": 696 } }, "b22724e628384993aa52c255c8bffc6f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_32ce45ecf0d64c81bea12692ad52ed45", "placeholder": "​", "style": "IPY_MODEL_b930858da1a2407ba49a8a8a17a1fc70", "value": " 696/696 [00:00<00:00, 35.8kB/s]" } }, "e4dc10b2ef9c4a1f99689ebe9d48886a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6b580a221e2f48b5b058c6a6189cc99d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e5f30b22e01c4b74a2f20f3b9880d7ae": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e353ffe187d94729a65453fcd3d8a9a3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b056d5ef8b9244828e10f685e38d47bb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "32ce45ecf0d64c81bea12692ad52ed45": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b930858da1a2407ba49a8a8a17a1fc70": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d366271fc98943aa8fe3da314c1e95dc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_364ab880ac304099b1bb83a92d6a7eed", "IPY_MODEL_4a8d5b1ca09d493c8b93baf92e7fd5ae", "IPY_MODEL_c6867a46aa064e26831be8a3a4278905" ], "layout": "IPY_MODEL_053fc43d2aad4aeeb114b8fd9aa2aef6" } }, "364ab880ac304099b1bb83a92d6a7eed": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9583dca84a14e9bb89e58a430c391e3", "placeholder": "​", "style": "IPY_MODEL_15b6e4ff685f4f5e8c997e151d2b4007", "value": "pytorch_model.bin: 100%" } }, "4a8d5b1ca09d493c8b93baf92e7fd5ae": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d73e215040114141bc60b0f58a1f8646", "max": 506353257, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_18ab1cf13c494b02af122ee0069e3c92", "value": 506353257 } }, "c6867a46aa064e26831be8a3a4278905": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_839cabb3e69c46549665769afcc24e08", "placeholder": "​", "style": "IPY_MODEL_56fe41cf83454378a9345ee4e9a26192", "value": " 506M/506M [00:09<00:00, 57.3MB/s]" } }, "053fc43d2aad4aeeb114b8fd9aa2aef6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a9583dca84a14e9bb89e58a430c391e3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "15b6e4ff685f4f5e8c997e151d2b4007": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d73e215040114141bc60b0f58a1f8646": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "18ab1cf13c494b02af122ee0069e3c92": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "839cabb3e69c46549665769afcc24e08": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "56fe41cf83454378a9345ee4e9a26192": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EWlgdx43A_NL", "outputId": "a2431529-0c26-4076-a6cf-4c9146c4f9b0" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "source": [ "! pip install faknow\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7dssDha5BGNL", "outputId": "5d303d54-6c32-482d-83e7-c46a4506cbce" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting faknow\n", " Downloading faknow-0.0.3-py3-none-any.whl (147 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.6/147.6 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: transformers>=4.26.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.38.2)\n", "Requirement already satisfied: numpy>=1.23.4 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.25.2)\n", "Requirement already satisfied: pandas>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.5.3)\n", "Requirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.2.2)\n", "Requirement already satisfied: tensorboard>=2.10.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (2.15.2)\n", "Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.66.2)\n", "Requirement already satisfied: jieba>=0.42.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.42.1)\n", "Requirement already satisfied: gensim>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.3.2)\n", "Requirement already satisfied: pillow>=9.3.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (9.4.0)\n", "Requirement already satisfied: nltk>=3.7 in /usr/local/lib/python3.10/dist-packages (from faknow) (3.8.1)\n", "Collecting sphinx-markdown-tables>=0.0.17 (from faknow)\n", " Downloading sphinx_markdown_tables-0.0.17-py3-none-any.whl (28 kB)\n", "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (1.11.4)\n", "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (6.4.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (8.1.7)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (1.3.2)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (2023.12.25)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2023.4)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1.3->faknow) (3.3.0)\n", "Requirement already satisfied: markdown>=3.4 in /usr/local/lib/python3.10/dist-packages (from sphinx-markdown-tables>=0.0.17->faknow) (3.5.2)\n", "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.4.0)\n", "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.62.0)\n", "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.27.0)\n", "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.2.0)\n", "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.20.3)\n", "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.31.0)\n", "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (67.7.2)\n", "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.16.0)\n", "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (0.7.2)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.0.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.20.3)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (6.0.1)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.15.2)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.4.2)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (5.3.3)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.3.0)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (4.9)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (1.3.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (4.10.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2024.2.2)\n", "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->faknow) (2.1.5)\n", "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.5.1)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (3.2.2)\n", "Installing collected packages: sphinx-markdown-tables, faknow\n", "Successfully installed faknow-0.0.3 sphinx-markdown-tables-0.0.17\n" ] } ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "Zo3_tdxod_tn" }, "outputs": [], "source": [ "from typing import List, Optional, Tuple\n", "\n", "import torch\n", "from torch import Tensor\n", "from torch import nn\n", "from transformers import RobertaModel\n", "\n", "from faknow.model.layers.layer import TextCNNLayer\n", "from faknow.model.model import AbstractModel\n", "import pandas as pd\n", "\n", "\n", "class _MLP(nn.Module):\n", " def __init__(self,\n", " input_dim: int,\n", " embed_dims: List[int],\n", " dropout_rate: float,\n", " output_layer=True):\n", " super().__init__()\n", " layers = list()\n", " for embed_dim in embed_dims:\n", " layers.append(nn.Linear(input_dim, embed_dim))\n", " layers.append(nn.BatchNorm1d(embed_dim))\n", " layers.append(nn.ReLU())\n", " layers.append(nn.Dropout(p=dropout_rate))\n", " input_dim = embed_dim\n", " if output_layer:\n", " layers.append(torch.nn.Linear(input_dim, 1))\n", " self.mlp = torch.nn.Sequential(*layers)\n", "\n", " def forward(self, x: Tensor) -> Tensor:\n", " \"\"\"\n", "\n", " Args:\n", " x (Tensor): shared feature from domain and text, shape=(batch_size, embed_dim)\n", "\n", " \"\"\"\n", " return self.mlp(x)\n", "\n", "\n", "class _MaskAttentionLayer(torch.nn.Module):\n", " \"\"\"\n", " Compute attention layer\n", " \"\"\"\n", " def __init__(self, input_size: int):\n", " super(_MaskAttentionLayer, self).__init__()\n", " self.attention_layer = torch.nn.Linear(input_size, 1)\n", "\n", " def forward(self,\n", " inputs: Tensor,\n", " mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:\n", " weights = self.attention_layer(inputs).view(-1, inputs.size(1))\n", " if mask is not None:\n", " weights = weights.masked_fill(mask == 0, float(\"-inf\"))\n", " weights = torch.softmax(weights, dim=-1).unsqueeze(1)\n", " outputs = torch.matmul(weights, inputs).squeeze(1)\n", " return outputs, weights\n", "\n", "\n", "class MDFEND(AbstractModel):\n", " r\"\"\"\n", " MDFEND: Multi-domain Fake News Detection, CIKM 2021\n", " paper: https://dl.acm.org/doi/10.1145/3459637.3482139\n", " code: https://github.com/kennqiang/MDFEND-Weibo21\n", " \"\"\"\n", " def __init__(self,\n", " pre_trained_bert_name: str,\n", " domain_num: int,\n", " mlp_dims: Optional[List[int]] = None,\n", " dropout_rate=0.2,\n", " expert_num=5):\n", " \"\"\"\n", "\n", " Args:\n", " pre_trained_bert_name (str): the name or local path of pre-trained bert model\n", " domain_num (int): total number of all domains\n", " mlp_dims (List[int]): a list of the dimensions in MLP layer, if None, [384] will be taken as default, default=384\n", " dropout_rate (float): rate of Dropout layer, default=0.2\n", " expert_num (int): number of experts also called TextCNNLayer, default=5\n", " \"\"\"\n", " super(MDFEND, self).__init__()\n", " self.domain_num = domain_num\n", " self.expert_num = expert_num\n", " self.bert = RobertaModel.from_pretrained(\n", " pre_trained_bert_name).requires_grad_(False)\n", " self.embedding_size = self.bert.config.hidden_size\n", " self.loss_func = nn.BCELoss()\n", " if mlp_dims is None:\n", " mlp_dims = [384]\n", "\n", " filter_num = 64\n", " filter_sizes = [1, 2, 3, 5, 10]\n", " experts = [\n", " TextCNNLayer(self.embedding_size, filter_num, filter_sizes)\n", " for _ in range(self.expert_num)\n", " ]\n", " self.experts = nn.ModuleList(experts)\n", "\n", " self.gate = nn.Sequential(\n", " nn.Linear(self.embedding_size * 2, mlp_dims[-1]), nn.ReLU(),\n", " nn.Linear(mlp_dims[-1], self.expert_num), nn.Softmax(dim=1))\n", "\n", " self.attention = _MaskAttentionLayer(self.embedding_size)\n", "\n", " self.domain_embedder = nn.Embedding(num_embeddings=self.domain_num,\n", " embedding_dim=self.embedding_size)\n", " self.classifier = _MLP(320, mlp_dims, dropout_rate)\n", "\n", " def forward(self, token_id: Tensor, mask: Tensor,\n", " domain: Tensor) -> Tensor:\n", " \"\"\"\n", "\n", " Args:\n", " token_id (Tensor): token ids from bert tokenizer, shape=(batch_size, max_len)\n", " mask (Tensor): mask from bert tokenizer, shape=(batch_size, max_len)\n", " domain (Tensor): domain id, shape=(batch_size,)\n", "\n", " Returns:\n", " FloatTensor: the prediction of being fake, shape=(batch_size,)\n", " \"\"\"\n", " text_embedding = self.bert(token_id,\n", " attention_mask=mask).last_hidden_state\n", " attention_feature, _ = self.attention(text_embedding, mask)\n", "\n", " domain_embedding = self.domain_embedder(domain.view(-1, 1)).squeeze(1)\n", "\n", " gate_input = torch.cat([domain_embedding, attention_feature], dim=-1)\n", " gate_output = self.gate(gate_input)\n", "\n", " shared_feature = 0\n", " for i in range(self.expert_num):\n", " expert_feature = self.experts[i](text_embedding)\n", " shared_feature += (expert_feature * gate_output[:, i].unsqueeze(1))\n", "\n", " label_pred = self.classifier(shared_feature)\n", "\n", " return torch.sigmoid(label_pred.squeeze(1))\n", "\n", " def calculate_loss(self, data) -> Tensor:\n", " \"\"\"\n", " calculate loss via BCELoss\n", "\n", " Args:\n", " data (dict): batch data dict\n", "\n", " Returns:\n", " loss (Tensor): loss value\n", " \"\"\"\n", "\n", " token_ids = data['text']['token_id']\n", " masks = data['text']['mask']\n", " domains = data['domain']\n", " labels = data['label']\n", " output = self.forward(token_ids, masks, domains)\n", " return self.loss_func(output, labels.float())\n", "\n", " def predict(self, data_without_label) -> Tensor:\n", " \"\"\"\n", " predict the probability of being fake news\n", "\n", " Args:\n", " data_without_label (Dict[str, Any]): batch data dict\n", "\n", " Returns:\n", " Tensor: one-hot probability, shape=(batch_size, 2)\n", " \"\"\"\n", "\n", " token_ids = data_without_label['text']['token_id']\n", " masks = data_without_label['text']['mask']\n", " domains = data_without_label['domain']\n", "\n", "\n", " output_prob = self.forward(token_ids, masks,domains)\n", "\n", " return output_prob" ] }, { "cell_type": "code", "source": [ "from faknow.data.dataset.text import TextDataset\n", "from faknow.data.process.text_process import TokenizerFromPreTrained\n", "from faknow.evaluate.evaluator import Evaluator\n", "\n", "import torch\n", "from torch.utils.data import DataLoader" ], "metadata": { "id": "Tg2zBjzUBTbt" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "testing_path = \"/content/drive/MyDrive/sinhala-dataset/test_data.json\"\n" ], "metadata": { "id": "Ls-xo82WBbUg" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "df = pd.read_json(testing_path)\n", "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "by3bnTMCMh6K", "outputId": "bdc10951-f15e-4918-b7cd-84dd5535b4e5" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text domain label\n", "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 1\n", "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n", "2 ඒකි ඒම නෑ බං # jaysays 0 0\n", "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 1\n", "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textdomainlabel
0@USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ01
1@USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක...00
2ඒකි ඒම නෑ බං # jaysays00
3@USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා...01
4කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර...00
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5000,\n \"samples\": [\n \"\\u0d87\\u0dba\\u0dd2 \\u0dba\\u0d9a\\u0ddd \\u0dbd\\u0d82\\u0d9a\\u0dcf\\u0dc0\\u0dda \\u0db6\\u0dd9\\u0dbb\\u0dd2 ! \\u0daf\\u0dd3\\u0db4\\u0dbd\\u0dca\\u0dbd\\u0dcf # \\u0dc0\\u0dd9\\u0dbb\\u0dd2 # \\u0d9c\\u0dbd\\u0dca\\u0db6\\u0ddd\\u0dad\\u0dbd\\u0dca # GenElecSL # SriLanka # TamilNadu # Election2015\",\n \"@USER @USER \\u0d9a\\u0dcf\\u0dbd\\u0d9a\\u0db1\\u0dca\\u0db1\\u0dd2 \\u0d95\\u0d9a\\u0dd4\\u0db1\\u0d9c\\u0dd9 \\u0db4\\u0dd4\\u0d9a \\u0db8\\u0dc4\\u0dbd\\u0dcf \\u0db6\\u0da9\\u0dba\\u0db1\\u0dca\\u0db1 \\u0db6\\u0dd9\\u0dc4\\u0dd9\\u0dad\\u0dca \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0d95\\u0db1\\u0dcf\",\n \"\\u0dc3\\u0dd3\\u0dbd\\u0dcf\\u0dc0\\u0dad\\u0dd4\\u0dbb \\u0db1\\u0dcf / \\u0dc3\\u0dd9\\u0db6\\u0dc5\\u0dd4 \\u0dad\\u0dd2\\u0daf\\u0dd9\\u0db1\\u0d9a\\u0dd4\\u0da7 \\u0db4\\u0dc4\\u0dbb\\u0daf\\u0dd3 \\u0dc3\\u0dd9\\u0db6\\u0dbd\\u0dd9\\u0d9a\\u0dca \\u0dc4\\u0dd2\\u0dbb\\u0d9a\\u0dbb\\u0dba\\u0dd2 \\u0db6\\u0dda\\u0dbb\\u0dcf\\u0d9c\\u0dad\\u0dca\\u0dad\\u0dd9 \\u0db1\\u0dd3\\u0dad\\u0dd2\\u0db8\\u0dba \\u0db4\\u0dd2\\u0dba\\u0dc0\\u0dbb \\u0db1\\u0ddc\\u0d9c\\u0db1\\u0dca\\u0db1 \\u0db4\\u0ddc\\u0dbb\\u0ddc\\u0db1\\u0dca\\u0daf\\u0dd4\\u0dc0 \\u0db8\\u0dad\\u0dba \\u0db1\\u0ddc\\u0daf\\u0d9a\\u0dd2\\u0db1\\u0dca URL via @USER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "df =df[:100]" ], "metadata": { "id": "LX0T74ZtM9j9" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "df[\"label\"] = int(0)" ], "metadata": { "id": "60iL_I8ONCts" }, "execution_count": 9, "outputs": [] }, { "cell_type": "code", "source": [ "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "SE4yeguZNIo-", "outputId": "110eb559-0dd0-4f2e-cb1c-694100365a31" }, "execution_count": 10, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text domain label\n", "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 0\n", "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n", "2 ඒකි ඒම නෑ බං # jaysays 0 0\n", "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 0\n", "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textdomainlabel
0@USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ00
1@USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක...00
2ඒකි ඒම නෑ බං # jaysays00
3@USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා...00
4කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර...00
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"\\u0d89\\u0dc3\\u0dca\\u0dc3\\u0dbb \\u0d8b\\u0db6 \\u0dc0\\u0dbd\\u0dd2\\u0dba\\u0d9a\\u0dca \\u0db1\\u0db8\\u0dca \\u0dbd\\u0ddc\\u0dc0\\u0dd9\\u0dad\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0ddc \\u0d9c\\u0dd2\\u0dc4\\u0dcf\\u0db1\\u0dca \\u0d85\\u0dba\\u0dd2\\u0dba\\u0dcf \\u0d89\\u0d9a\\u0dca\\u0db8\\u0db1\\u0da7 \\u0dc0\\u0dbb\\u0dd9\\u0db1\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0dbd\\u0dcf \\u0d85\\u0da9 \\u0d9c\\u0dc4\\u0dbd\\u0dcf \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0dca\\u0db1\\u0dda \\u0dad\\u0ddc \\u0daf\\u0dd0\\u0db1\\u0dca \\u0dc0\\u0dd9\\u0db1\\u0dc3\\u0dca \\u0d9a\\u0db8\\u0dca \\u0d9a\\u0dbb\\u0db1\\u0dc0\\u0dcf \\u0db8\\u0db1\\u0dca \\u0dad\\u0ddc\\u0da7 \\u0dc0\\u0ddb\\u0dbb\\u0dba\",\n \"\\u0d85\\u0db1\\u0dd4\\u0dbb\\u0d9c\\u0dd9 \\u0dc3\\u0da7\\u0dca\\u0da7\\u0dd0\\u0db9\\u0dd2\\u0dba\\u0db1\\u0dca\\u0d9c\\u0dd9 \\u0d89\\u0dad\\u0dca\\u0dad\\u0dd1\\u0dc0\\u0ddc \\u0daf\\u0dd4\\u0d91\\u0d85\\u0db1 \\u0d91\\u0d9a \\u0d85\\u0dc4\\u0db1\\u0dca\\u0db1\\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca\\u0daf \\u0dad\\u0dc0 \\u0db6\\u0dd0\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0da7\\u0dca\\u0d9c\\u0dc0 \\u0db8\\u0da9\\u0d9c\\u0dc4\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0d8b\\u0db9\\u0dbd\\u0d9c\\u0dd9 \\u0dc3\\u0d9a\\u0dca\\u0d9a\\u0dd2\\u0dbd\\u0dd2 \\u0db4\\u0dbb \\u0d9c\\u0dad\\u0dd2 \\u0dbd\\u0ddd\\u0d9a\\u0dd9\\u0da7\\u0db8 \\u0db4\\u0dd9\\u0db1\\u0dca\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd . % \\u0da7 \\u0d87\\u0daf\\u0dbd \\u0daf\\u0dd0\\u0db8\\u0dca\\u0db8\\u0dd9\\u0dad\\u0dca \\u0db8\\u0dda\\u0d9a\\u0dd9 \\u0d87\\u0db8\\u0db1\\u0dd9\\u0db1 \\u0dc3\\u0dd2\\u0db4\\u0dca\\u0db4\\u0dd2\\u0d9a\\u0da7\\u0dd4\",\n \"\\u0db8\\u0dbd\\u0dca\\u0dbd\\u0dd2\\u0d9c\\u0dd9 \\u0d8b\\u0db4\\u0db1\\u0dca\\u0daf\\u0dd2\\u0db1\\u0dda\\u0da7 \\u0dc4\\u0dd9\\u0da7 \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca \\u0d9a\\u0dbd\\u0dca\\u0db4\\u0db1\\u0dcf \\u0d9a\\u0dbb \\u0d9a\\u0dbb \\u0d89\\u0daf\\u0dca\\u0daf\\u0dd2 \\u0dbd\\u0ddc\\u0d9a\\u0dca\\u0d9a\\u0dcf \\u0daf\\u0dd4\\u0db1\\u0dca\\u0db1 \\u0dc3\\u0db4\\u0dca\\u200d\\u0dbb\\u0dcf\\u0dba\\u0dd2\\u0dc3\\u0dca \\u0d91\\u0d9a \\u0db1\\u0db8\\u0dca \\u0db4\\u0da7\\u0dca\\u0da7 . . \\u0dbb\\u0dad\\u0dd2\\u0da4\\u0dca\\u0da4\\u0dcf \\u0db4\\u0dd9\\u0da7\\u0dca\\u0da7\\u0dd2\\u0dba\\u0d9a\\u0dd2\\u0db1\\u0dca \\u0dc2\\u0dda\\u0db4\\u0dca \\u0d8b\\u0db1\\u0dd1\\u0d9a\\u0dd2 \\u0dc4\\u0dd0\\u0db8\\u0dd2\\u0db1\\u0dda\\u0dc2\\u0db1\\u0dca ourNation HappyBirthday\\u0d92\\u0d85\\u0dc3\\u0dca\\u0dc3\\u0dda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "print(len(df))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zTRfsZ_tNLif", "outputId": "d0012de3-5298-4be5-b280-dee66208a034" }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "100\n" ] } ] }, { "cell_type": "code", "source": [ "path = '/content/drive/MyDrive/sinhala-dataset'\n", "testing_json = \"/testing.json\"" ], "metadata": { "id": "weZ2_xujNW1b" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "df.to_json(path + testing_json, orient='records')\n" ], "metadata": { "id": "HzAfca0LNUDx" }, "execution_count": 13, "outputs": [] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 400, "referenced_widgets": [ "1edd00396f2d45a7b32079d43bc62634", "a96994a464df43918566f6cc967e7148", "92143f1854c44349a3d0f6b7838b6a5c", "b90793e5e29c435cab6fd7b1e059c992", "8ce96d68c1e443b28e1200f106fefb02", "dcfa2646664e449c98a00e89b2b7984d", "28bcfe01e8a64ba08ce62e9715ad85e4", "1b72e7f7e85a49fb8c7a79bce1989647", "76523fc98b644aaaaf6c605544e9fffb", "b2d2a9eabbe14cddaa7d0aa39e7a1953", "bf4db198f72441b48a5dbff8515a1f91", "48e8e488c27a4948a455835f6caf2ce2", "2fb5a8ac30ed49df93056bc6802e8ee0", "c29fcea40de347bf9f274f375b9123a5", "00cd02215e1f4225a4cf93b46b9a7e15", "e1064a6f8bfd4435a6ad15d08ff44699", "956884e6e8bc43f4bc51a2b75c131889", "4e23a4eec35f4f8ea3114d9cb0ea1e04", "e7a3964adec34bf6b37f52cf1119fa9c", "22a128f583aa4514a1e71d0f8aaf8e79", "3f902b8cb652446c84609cd730a64e35", "a78b7664a1e346f181b203bb1645eb9b", "3bf7edeed06a4ef3b1ce28f24201c84b", "205cbd07f34345c48b1e72bc2cb9a93b", "a2d328e2313a49aab752cd2ba38220b5", "a3696eb89c4e434683bb5416d91602db", "e04dfc9c2e5f437c8dd9b15f33c04a4a", "1f5b368654494327bfc8d1c315f13832", "853eb13a56ec4fbf89e25f333798132e", "5713250ff3864029a3668c6a7eb1f3e3", "944ed217567144459ea5279c34f529f3", "1e0faaa1a09f42f8964a3203472f50c2", "7c3da939876e4a6f8f2969fbf96bbcd0", "aacafd29b7b5403bb8a7df1ebe2a731e", "ec944b4365c34ab6813af9d925e2a552", "8d48d360da5945bfbf300ae455043c07", "36f7f5ae075f4c59a44283e25088eaab", "8cfa54ddcf354e0e9f71102656a744cf", "edcb22216cac4bcf83a301d975f20d2d", "3ace28614fe446f18268578e56b5ec14", "3c8f3c862f744ddba9524079c636124a", "36eb36dba13d4ad4a73b401e0dc22c42", "236bb6124df1443684b16dd34fba2ed4", "3264137a43cc4725948aab030421b24c", "dd76625672d74095a0f691206646fbd8", "a10c0e99afb546d79fca304e8a8e6ab1", "6fa7911781ef42949e56c80dc1f85299", "832e08b2b4524680bca9d71c363f3232", "10ec65b4df86458ba2eee2eeccdc91e6", "764921df2d7b438b8a8ad0d7b68b8b6e", "12f7b959b26b472099ea4e06b606772a", "32c6cb10b1e946a89a7b08505c1582e9", "3ee11622c9a0405abf5f246720d358b2", "d78ced95d2134e299573c1e8d712b3f8", "6ff7b4b691a74f6ab232c26ee55b9982", "2cc542103450405a853945ff07471932", "5807fb12507f4fc0aaa4d083477aba27", "11d2ce4ace194bb1825fefacf3cc36f0", "b22724e628384993aa52c255c8bffc6f", "e4dc10b2ef9c4a1f99689ebe9d48886a", "6b580a221e2f48b5b058c6a6189cc99d", "e5f30b22e01c4b74a2f20f3b9880d7ae", "e353ffe187d94729a65453fcd3d8a9a3", "b056d5ef8b9244828e10f685e38d47bb", "32ce45ecf0d64c81bea12692ad52ed45", "b930858da1a2407ba49a8a8a17a1fc70", "d366271fc98943aa8fe3da314c1e95dc", "364ab880ac304099b1bb83a92d6a7eed", "4a8d5b1ca09d493c8b93baf92e7fd5ae", "c6867a46aa064e26831be8a3a4278905", "053fc43d2aad4aeeb114b8fd9aa2aef6", "a9583dca84a14e9bb89e58a430c391e3", "15b6e4ff685f4f5e8c997e151d2b4007", "d73e215040114141bc60b0f58a1f8646", "18ab1cf13c494b02af122ee0069e3c92", "839cabb3e69c46549665769afcc24e08", "56fe41cf83454378a9345ee4e9a26192" ] }, "id": "ROUE4LV1d_tp", "outputId": "7f3d865e-97c6-434e-a8ae-f69a3462586a" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:80: UserWarning: \n", "Access to the secret `HF_TOKEN` has not been granted on this notebook.\n", "You will not be requested again.\n", "Please restart the session if you want to be prompted again.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "tokenizer_config.json: 0%| | 0.00/1.38k [00:00" ] }, "metadata": {}, "execution_count": 14 } ], "source": [ "MODEL_SAVE_PATH = \"/content/drive/MyDrive/models-path-improvement/last-epoch-model-2024-03-08-15_34_03_6.pth\"\n", "\n", "max_len, bert = 160 , 'sinhala-nlp/sinbert-sold-si'\n", "tokenizer = TokenizerFromPreTrained(max_len, bert)\n", "\n", "# dataset\n", "batch_size = 100\n", "\n", "\n", "testing_path = path + testing_json\n", "\n", "testing_set = TextDataset(testing_path, ['text'], tokenizer)\n", "testing_loader = DataLoader(testing_set, batch_size, shuffle=False)\n", "\n", "# prepare model\n", "domain_num = 3\n", "\n", "model = MDFEND(bert, domain_num , expert_num=18 , mlp_dims = [5080 ,4020, 3010, 2024 ,1012 ,606 , 400])\n", "model.load_state_dict(torch.load(f=MODEL_SAVE_PATH, map_location=torch.device('cpu')))\n", "\n", "\n" ] }, { "cell_type": "code", "source": [ "outputs = []\n", "for batch_data in testing_loader:\n", " outputs.append(model.predict(batch_data))" ], "metadata": { "id": "nsTmmtm7ENK7" }, "execution_count": 15, "outputs": [] }, { "cell_type": "code", "source": [ "outputs" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MgJFRW6uOTNK", "outputId": "37176cb8-b2e6-4c3b-c852-c4e3a17cda30" }, "execution_count": 16, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[tensor([1.3248e-03, 2.0616e-01, 4.5341e-02, 9.3156e-01, 2.3167e-01, 9.9967e-01,\n", " 6.8980e-02, 8.8265e-01, 4.6962e-01, 1.4711e-01, 3.9079e-01, 1.5254e-02,\n", " 1.4336e-01, 9.9974e-01, 9.4320e-02, 9.6368e-01, 3.0400e-01, 1.1099e-02,\n", " 8.6662e-01, 9.0376e-02, 4.0686e-01, 9.9839e-01, 9.9700e-01, 4.9826e-02,\n", " 9.6036e-01, 3.1445e-02, 7.8756e-01, 5.2800e-01, 9.4090e-01, 9.9148e-01,\n", " 9.9725e-01, 1.6041e-02, 2.9223e-01, 1.5572e-01, 7.2350e-02, 8.2344e-02,\n", " 5.4701e-03, 7.9817e-01, 1.6082e-03, 2.3789e-01, 2.0766e-02, 9.8514e-01,\n", " 1.4062e-02, 9.8410e-01, 5.0685e-01, 1.0039e-01, 3.5957e-01, 4.6990e-01,\n", " 6.0348e-01, 5.4888e-01, 9.7326e-02, 1.4647e-03, 2.0198e-02, 9.9995e-01,\n", " 8.6098e-01, 7.3051e-01, 3.0538e-03, 9.9967e-01, 5.4075e-03, 2.4586e-02,\n", " 2.1326e-01, 9.9988e-01, 7.7565e-01, 7.3468e-01, 8.2214e-02, 8.3052e-03,\n", " 3.7278e-01, 3.6124e-01, 2.4839e-01, 2.4560e-01, 3.9281e-02, 9.9611e-01,\n", " 2.3351e-02, 1.9584e-01, 1.1381e-01, 2.4559e-01, 6.5344e-01, 3.5736e-01,\n", " 8.6219e-04, 3.8071e-01, 5.6490e-01, 2.5499e-02, 6.1897e-02, 9.0802e-01,\n", " 8.2842e-02, 8.5183e-04, 6.4453e-01, 6.0612e-01, 3.8544e-01, 4.0832e-02,\n", " 6.0973e-01, 4.7808e-02, 7.0927e-01, 8.4603e-01, 5.6889e-01, 5.3337e-01,\n", " 1.2113e-01, 8.6022e-01, 8.5642e-03, 9.9990e-01],\n", " grad_fn=)]" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "code", "source": [ "# 1 ====> offensive\n", "# 0 ====> not offensive\n", "label = []\n", "for output in outputs:\n", " for out in output:\n", " output_prob = out.item()\n", " if output_prob >= 0.5:\n", " label.append(1)\n", " else:\n", " label.append(0)\n" ], "metadata": { "id": "ySdut6vMOvQY" }, "execution_count": 20, "outputs": [] }, { "cell_type": "code", "source": [ "label" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8thgk3ykPTOD", "outputId": "fb05dd91-01d4-44a8-ef66-caf0eda24831" }, "execution_count": 21, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 1,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 1,\n", " 1,\n", " 1,\n", " 1,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 1,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 1,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 1,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 0,\n", " 1,\n", " 1,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 1,\n", " 1,\n", " 1,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 1]" ] }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "P4eIsw1DPYQG" }, "execution_count": null, "outputs": [] } ] }