diff --git "a/prediction_sinhala.ipynb" "b/prediction_sinhala.ipynb" deleted file mode 100644--- "a/prediction_sinhala.ipynb" +++ /dev/null @@ -1,3865 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "1edd00396f2d45a7b32079d43bc62634": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_a96994a464df43918566f6cc967e7148", - "IPY_MODEL_92143f1854c44349a3d0f6b7838b6a5c", - "IPY_MODEL_b90793e5e29c435cab6fd7b1e059c992" - ], - "layout": "IPY_MODEL_8ce96d68c1e443b28e1200f106fefb02" - } - }, - "a96994a464df43918566f6cc967e7148": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dcfa2646664e449c98a00e89b2b7984d", - "placeholder": "​", - "style": "IPY_MODEL_28bcfe01e8a64ba08ce62e9715ad85e4", - "value": "tokenizer_config.json: 100%" - } - }, - "92143f1854c44349a3d0f6b7838b6a5c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1b72e7f7e85a49fb8c7a79bce1989647", - "max": 1375, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_76523fc98b644aaaaf6c605544e9fffb", - "value": 1375 - } - }, - "b90793e5e29c435cab6fd7b1e059c992": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b2d2a9eabbe14cddaa7d0aa39e7a1953", - "placeholder": "​", - "style": "IPY_MODEL_bf4db198f72441b48a5dbff8515a1f91", - "value": " 1.38k/1.38k [00:00<00:00, 36.2kB/s]" - } - }, - "8ce96d68c1e443b28e1200f106fefb02": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dcfa2646664e449c98a00e89b2b7984d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "28bcfe01e8a64ba08ce62e9715ad85e4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1b72e7f7e85a49fb8c7a79bce1989647": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "76523fc98b644aaaaf6c605544e9fffb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b2d2a9eabbe14cddaa7d0aa39e7a1953": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bf4db198f72441b48a5dbff8515a1f91": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "48e8e488c27a4948a455835f6caf2ce2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2fb5a8ac30ed49df93056bc6802e8ee0", - "IPY_MODEL_c29fcea40de347bf9f274f375b9123a5", - "IPY_MODEL_00cd02215e1f4225a4cf93b46b9a7e15" - ], - "layout": "IPY_MODEL_e1064a6f8bfd4435a6ad15d08ff44699" - } - }, - "2fb5a8ac30ed49df93056bc6802e8ee0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_956884e6e8bc43f4bc51a2b75c131889", - "placeholder": "​", - "style": "IPY_MODEL_4e23a4eec35f4f8ea3114d9cb0ea1e04", - "value": "vocab.json: 100%" - } - }, - "c29fcea40de347bf9f274f375b9123a5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e7a3964adec34bf6b37f52cf1119fa9c", - "max": 1500217, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_22a128f583aa4514a1e71d0f8aaf8e79", - "value": 1500217 - } - }, - "00cd02215e1f4225a4cf93b46b9a7e15": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3f902b8cb652446c84609cd730a64e35", - "placeholder": "​", - "style": "IPY_MODEL_a78b7664a1e346f181b203bb1645eb9b", - "value": " 1.50M/1.50M [00:00<00:00, 9.80MB/s]" - } - }, - "e1064a6f8bfd4435a6ad15d08ff44699": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "956884e6e8bc43f4bc51a2b75c131889": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4e23a4eec35f4f8ea3114d9cb0ea1e04": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e7a3964adec34bf6b37f52cf1119fa9c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "22a128f583aa4514a1e71d0f8aaf8e79": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "3f902b8cb652446c84609cd730a64e35": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a78b7664a1e346f181b203bb1645eb9b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3bf7edeed06a4ef3b1ce28f24201c84b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_205cbd07f34345c48b1e72bc2cb9a93b", - "IPY_MODEL_a2d328e2313a49aab752cd2ba38220b5", - "IPY_MODEL_a3696eb89c4e434683bb5416d91602db" - ], - "layout": "IPY_MODEL_e04dfc9c2e5f437c8dd9b15f33c04a4a" - } - }, - "205cbd07f34345c48b1e72bc2cb9a93b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1f5b368654494327bfc8d1c315f13832", - "placeholder": "​", - "style": "IPY_MODEL_853eb13a56ec4fbf89e25f333798132e", - "value": "merges.txt: 100%" - } - }, - "a2d328e2313a49aab752cd2ba38220b5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5713250ff3864029a3668c6a7eb1f3e3", - "max": 1146413, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_944ed217567144459ea5279c34f529f3", - "value": 1146413 - } - }, - "a3696eb89c4e434683bb5416d91602db": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1e0faaa1a09f42f8964a3203472f50c2", - "placeholder": "​", - "style": "IPY_MODEL_7c3da939876e4a6f8f2969fbf96bbcd0", - "value": " 1.15M/1.15M [00:00<00:00, 14.1MB/s]" - } - }, - "e04dfc9c2e5f437c8dd9b15f33c04a4a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1f5b368654494327bfc8d1c315f13832": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "853eb13a56ec4fbf89e25f333798132e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5713250ff3864029a3668c6a7eb1f3e3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "944ed217567144459ea5279c34f529f3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "1e0faaa1a09f42f8964a3203472f50c2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7c3da939876e4a6f8f2969fbf96bbcd0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "aacafd29b7b5403bb8a7df1ebe2a731e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ec944b4365c34ab6813af9d925e2a552", - "IPY_MODEL_8d48d360da5945bfbf300ae455043c07", - "IPY_MODEL_36f7f5ae075f4c59a44283e25088eaab" - ], - "layout": "IPY_MODEL_8cfa54ddcf354e0e9f71102656a744cf" - } - }, - "ec944b4365c34ab6813af9d925e2a552": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_edcb22216cac4bcf83a301d975f20d2d", - "placeholder": "​", - "style": "IPY_MODEL_3ace28614fe446f18268578e56b5ec14", - "value": "tokenizer.json: 100%" - } - }, - "8d48d360da5945bfbf300ae455043c07": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3c8f3c862f744ddba9524079c636124a", - "max": 3529879, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_36eb36dba13d4ad4a73b401e0dc22c42", - "value": 3529879 - } - }, - "36f7f5ae075f4c59a44283e25088eaab": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_236bb6124df1443684b16dd34fba2ed4", - "placeholder": "​", - "style": "IPY_MODEL_3264137a43cc4725948aab030421b24c", - "value": " 3.53M/3.53M [00:00<00:00, 22.8MB/s]" - } - }, - "8cfa54ddcf354e0e9f71102656a744cf": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "edcb22216cac4bcf83a301d975f20d2d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3ace28614fe446f18268578e56b5ec14": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3c8f3c862f744ddba9524079c636124a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "36eb36dba13d4ad4a73b401e0dc22c42": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "236bb6124df1443684b16dd34fba2ed4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3264137a43cc4725948aab030421b24c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dd76625672d74095a0f691206646fbd8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_a10c0e99afb546d79fca304e8a8e6ab1", - "IPY_MODEL_6fa7911781ef42949e56c80dc1f85299", - "IPY_MODEL_832e08b2b4524680bca9d71c363f3232" - ], - "layout": "IPY_MODEL_10ec65b4df86458ba2eee2eeccdc91e6" - } - }, - "a10c0e99afb546d79fca304e8a8e6ab1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_764921df2d7b438b8a8ad0d7b68b8b6e", - "placeholder": "​", - "style": "IPY_MODEL_12f7b959b26b472099ea4e06b606772a", - "value": "special_tokens_map.json: 100%" - } - }, - "6fa7911781ef42949e56c80dc1f85299": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_32c6cb10b1e946a89a7b08505c1582e9", - "max": 957, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3ee11622c9a0405abf5f246720d358b2", - "value": 957 - } - }, - "832e08b2b4524680bca9d71c363f3232": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d78ced95d2134e299573c1e8d712b3f8", - "placeholder": "​", - "style": "IPY_MODEL_6ff7b4b691a74f6ab232c26ee55b9982", - "value": " 957/957 [00:00<00:00, 43.5kB/s]" - } - }, - "10ec65b4df86458ba2eee2eeccdc91e6": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "764921df2d7b438b8a8ad0d7b68b8b6e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "12f7b959b26b472099ea4e06b606772a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "32c6cb10b1e946a89a7b08505c1582e9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3ee11622c9a0405abf5f246720d358b2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "d78ced95d2134e299573c1e8d712b3f8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6ff7b4b691a74f6ab232c26ee55b9982": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2cc542103450405a853945ff07471932": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_5807fb12507f4fc0aaa4d083477aba27", - "IPY_MODEL_11d2ce4ace194bb1825fefacf3cc36f0", - "IPY_MODEL_b22724e628384993aa52c255c8bffc6f" - ], - "layout": "IPY_MODEL_e4dc10b2ef9c4a1f99689ebe9d48886a" - } - }, - "5807fb12507f4fc0aaa4d083477aba27": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6b580a221e2f48b5b058c6a6189cc99d", - "placeholder": "​", - "style": "IPY_MODEL_e5f30b22e01c4b74a2f20f3b9880d7ae", - "value": "config.json: 100%" - } - }, - "11d2ce4ace194bb1825fefacf3cc36f0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e353ffe187d94729a65453fcd3d8a9a3", - "max": 696, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b056d5ef8b9244828e10f685e38d47bb", - "value": 696 - } - }, - "b22724e628384993aa52c255c8bffc6f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_32ce45ecf0d64c81bea12692ad52ed45", - "placeholder": "​", - "style": "IPY_MODEL_b930858da1a2407ba49a8a8a17a1fc70", - "value": " 696/696 [00:00<00:00, 35.8kB/s]" - } - }, - "e4dc10b2ef9c4a1f99689ebe9d48886a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6b580a221e2f48b5b058c6a6189cc99d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e5f30b22e01c4b74a2f20f3b9880d7ae": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e353ffe187d94729a65453fcd3d8a9a3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b056d5ef8b9244828e10f685e38d47bb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "32ce45ecf0d64c81bea12692ad52ed45": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b930858da1a2407ba49a8a8a17a1fc70": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d366271fc98943aa8fe3da314c1e95dc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_364ab880ac304099b1bb83a92d6a7eed", - "IPY_MODEL_4a8d5b1ca09d493c8b93baf92e7fd5ae", - "IPY_MODEL_c6867a46aa064e26831be8a3a4278905" - ], - "layout": "IPY_MODEL_053fc43d2aad4aeeb114b8fd9aa2aef6" - } - }, - "364ab880ac304099b1bb83a92d6a7eed": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a9583dca84a14e9bb89e58a430c391e3", - "placeholder": "​", - "style": "IPY_MODEL_15b6e4ff685f4f5e8c997e151d2b4007", - "value": "pytorch_model.bin: 100%" - } - }, - "4a8d5b1ca09d493c8b93baf92e7fd5ae": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d73e215040114141bc60b0f58a1f8646", - "max": 506353257, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_18ab1cf13c494b02af122ee0069e3c92", - "value": 506353257 - } - }, - "c6867a46aa064e26831be8a3a4278905": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_839cabb3e69c46549665769afcc24e08", - "placeholder": "​", - "style": "IPY_MODEL_56fe41cf83454378a9345ee4e9a26192", - "value": " 506M/506M [00:09<00:00, 57.3MB/s]" - } - }, - "053fc43d2aad4aeeb114b8fd9aa2aef6": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a9583dca84a14e9bb89e58a430c391e3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "15b6e4ff685f4f5e8c997e151d2b4007": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d73e215040114141bc60b0f58a1f8646": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "18ab1cf13c494b02af122ee0069e3c92": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "839cabb3e69c46549665769afcc24e08": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "56fe41cf83454378a9345ee4e9a26192": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } - }, - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "EWlgdx43A_NL", - "outputId": "a2431529-0c26-4076-a6cf-4c9146c4f9b0" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Mounted at /content/drive\n" - ] - } - ], - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ] - }, - { - "cell_type": "code", - "source": [ - "! pip install faknow\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7dssDha5BGNL", - "outputId": "5d303d54-6c32-482d-83e7-c46a4506cbce" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting faknow\n", - " Downloading faknow-0.0.3-py3-none-any.whl (147 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.6/147.6 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: transformers>=4.26.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.38.2)\n", - "Requirement already satisfied: numpy>=1.23.4 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.25.2)\n", - "Requirement already satisfied: pandas>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.5.3)\n", - "Requirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.2.2)\n", - "Requirement already satisfied: tensorboard>=2.10.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (2.15.2)\n", - "Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.66.2)\n", - "Requirement already satisfied: jieba>=0.42.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.42.1)\n", - "Requirement already satisfied: gensim>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.3.2)\n", - "Requirement already satisfied: pillow>=9.3.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (9.4.0)\n", - "Requirement already satisfied: nltk>=3.7 in /usr/local/lib/python3.10/dist-packages (from faknow) (3.8.1)\n", - "Collecting sphinx-markdown-tables>=0.0.17 (from faknow)\n", - " Downloading sphinx_markdown_tables-0.0.17-py3-none-any.whl (28 kB)\n", - "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (1.11.4)\n", - "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (6.4.0)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (8.1.7)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (1.3.2)\n", - "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (2023.12.25)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2023.4)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1.3->faknow) (3.3.0)\n", - "Requirement already satisfied: markdown>=3.4 in /usr/local/lib/python3.10/dist-packages (from sphinx-markdown-tables>=0.0.17->faknow) (3.5.2)\n", - "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.4.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.62.0)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.27.0)\n", - "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.2.0)\n", - "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.20.3)\n", - "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.31.0)\n", - "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (67.7.2)\n", - "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.16.0)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.0.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (3.13.1)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.20.3)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (23.2)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (6.0.1)\n", - "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.15.2)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.4.2)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (5.3.3)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (4.9)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (1.3.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (2023.6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (4.10.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2024.2.2)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->faknow) (2.1.5)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.5.1)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (3.2.2)\n", - "Installing collected packages: sphinx-markdown-tables, faknow\n", - "Successfully installed faknow-0.0.3 sphinx-markdown-tables-0.0.17\n" - ] - } - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "Zo3_tdxod_tn" - }, - "outputs": [], - "source": [ - "from typing import List, Optional, Tuple\n", - "\n", - "import torch\n", - "from torch import Tensor\n", - "from torch import nn\n", - "from transformers import RobertaModel\n", - "\n", - "from faknow.model.layers.layer import TextCNNLayer\n", - "from faknow.model.model import AbstractModel\n", - "import pandas as pd\n", - "\n", - "\n", - "class _MLP(nn.Module):\n", - " def __init__(self,\n", - " input_dim: int,\n", - " embed_dims: List[int],\n", - " dropout_rate: float,\n", - " output_layer=True):\n", - " super().__init__()\n", - " layers = list()\n", - " for embed_dim in embed_dims:\n", - " layers.append(nn.Linear(input_dim, embed_dim))\n", - " layers.append(nn.BatchNorm1d(embed_dim))\n", - " layers.append(nn.ReLU())\n", - " layers.append(nn.Dropout(p=dropout_rate))\n", - " input_dim = embed_dim\n", - " if output_layer:\n", - " layers.append(torch.nn.Linear(input_dim, 1))\n", - " self.mlp = torch.nn.Sequential(*layers)\n", - "\n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"\n", - "\n", - " Args:\n", - " x (Tensor): shared feature from domain and text, shape=(batch_size, embed_dim)\n", - "\n", - " \"\"\"\n", - " return self.mlp(x)\n", - "\n", - "\n", - "class _MaskAttentionLayer(torch.nn.Module):\n", - " \"\"\"\n", - " Compute attention layer\n", - " \"\"\"\n", - " def __init__(self, input_size: int):\n", - " super(_MaskAttentionLayer, self).__init__()\n", - " self.attention_layer = torch.nn.Linear(input_size, 1)\n", - "\n", - " def forward(self,\n", - " inputs: Tensor,\n", - " mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:\n", - " weights = self.attention_layer(inputs).view(-1, inputs.size(1))\n", - " if mask is not None:\n", - " weights = weights.masked_fill(mask == 0, float(\"-inf\"))\n", - " weights = torch.softmax(weights, dim=-1).unsqueeze(1)\n", - " outputs = torch.matmul(weights, inputs).squeeze(1)\n", - " return outputs, weights\n", - "\n", - "\n", - "class MDFEND(AbstractModel):\n", - " r\"\"\"\n", - " MDFEND: Multi-domain Fake News Detection, CIKM 2021\n", - " paper: https://dl.acm.org/doi/10.1145/3459637.3482139\n", - " code: https://github.com/kennqiang/MDFEND-Weibo21\n", - " \"\"\"\n", - " def __init__(self,\n", - " pre_trained_bert_name: str,\n", - " domain_num: int,\n", - " mlp_dims: Optional[List[int]] = None,\n", - " dropout_rate=0.2,\n", - " expert_num=5):\n", - " \"\"\"\n", - "\n", - " Args:\n", - " pre_trained_bert_name (str): the name or local path of pre-trained bert model\n", - " domain_num (int): total number of all domains\n", - " mlp_dims (List[int]): a list of the dimensions in MLP layer, if None, [384] will be taken as default, default=384\n", - " dropout_rate (float): rate of Dropout layer, default=0.2\n", - " expert_num (int): number of experts also called TextCNNLayer, default=5\n", - " \"\"\"\n", - " super(MDFEND, self).__init__()\n", - " self.domain_num = domain_num\n", - " self.expert_num = expert_num\n", - " self.bert = RobertaModel.from_pretrained(\n", - " pre_trained_bert_name).requires_grad_(False)\n", - " self.embedding_size = self.bert.config.hidden_size\n", - " self.loss_func = nn.BCELoss()\n", - " if mlp_dims is None:\n", - " mlp_dims = [384]\n", - "\n", - " filter_num = 64\n", - " filter_sizes = [1, 2, 3, 5, 10]\n", - " experts = [\n", - " TextCNNLayer(self.embedding_size, filter_num, filter_sizes)\n", - " for _ in range(self.expert_num)\n", - " ]\n", - " self.experts = nn.ModuleList(experts)\n", - "\n", - " self.gate = nn.Sequential(\n", - " nn.Linear(self.embedding_size * 2, mlp_dims[-1]), nn.ReLU(),\n", - " nn.Linear(mlp_dims[-1], self.expert_num), nn.Softmax(dim=1))\n", - "\n", - " self.attention = _MaskAttentionLayer(self.embedding_size)\n", - "\n", - " self.domain_embedder = nn.Embedding(num_embeddings=self.domain_num,\n", - " embedding_dim=self.embedding_size)\n", - " self.classifier = _MLP(320, mlp_dims, dropout_rate)\n", - "\n", - " def forward(self, token_id: Tensor, mask: Tensor,\n", - " domain: Tensor) -> Tensor:\n", - " \"\"\"\n", - "\n", - " Args:\n", - " token_id (Tensor): token ids from bert tokenizer, shape=(batch_size, max_len)\n", - " mask (Tensor): mask from bert tokenizer, shape=(batch_size, max_len)\n", - " domain (Tensor): domain id, shape=(batch_size,)\n", - "\n", - " Returns:\n", - " FloatTensor: the prediction of being fake, shape=(batch_size,)\n", - " \"\"\"\n", - " text_embedding = self.bert(token_id,\n", - " attention_mask=mask).last_hidden_state\n", - " attention_feature, _ = self.attention(text_embedding, mask)\n", - "\n", - " domain_embedding = self.domain_embedder(domain.view(-1, 1)).squeeze(1)\n", - "\n", - " gate_input = torch.cat([domain_embedding, attention_feature], dim=-1)\n", - " gate_output = self.gate(gate_input)\n", - "\n", - " shared_feature = 0\n", - " for i in range(self.expert_num):\n", - " expert_feature = self.experts[i](text_embedding)\n", - " shared_feature += (expert_feature * gate_output[:, i].unsqueeze(1))\n", - "\n", - " label_pred = self.classifier(shared_feature)\n", - "\n", - " return torch.sigmoid(label_pred.squeeze(1))\n", - "\n", - " def calculate_loss(self, data) -> Tensor:\n", - " \"\"\"\n", - " calculate loss via BCELoss\n", - "\n", - " Args:\n", - " data (dict): batch data dict\n", - "\n", - " Returns:\n", - " loss (Tensor): loss value\n", - " \"\"\"\n", - "\n", - " token_ids = data['text']['token_id']\n", - " masks = data['text']['mask']\n", - " domains = data['domain']\n", - " labels = data['label']\n", - " output = self.forward(token_ids, masks, domains)\n", - " return self.loss_func(output, labels.float())\n", - "\n", - " def predict(self, data_without_label) -> Tensor:\n", - " \"\"\"\n", - " predict the probability of being fake news\n", - "\n", - " Args:\n", - " data_without_label (Dict[str, Any]): batch data dict\n", - "\n", - " Returns:\n", - " Tensor: one-hot probability, shape=(batch_size, 2)\n", - " \"\"\"\n", - "\n", - " token_ids = data_without_label['text']['token_id']\n", - " masks = data_without_label['text']['mask']\n", - " domains = data_without_label['domain']\n", - "\n", - "\n", - " output_prob = self.forward(token_ids, masks,domains)\n", - "\n", - " return output_prob" - ] - }, - { - "cell_type": "code", - "source": [ - "from faknow.data.dataset.text import TextDataset\n", - "from faknow.data.process.text_process import TokenizerFromPreTrained\n", - "from faknow.evaluate.evaluator import Evaluator\n", - "\n", - "import torch\n", - "from torch.utils.data import DataLoader" - ], - "metadata": { - "id": "Tg2zBjzUBTbt" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "testing_path = \"/content/drive/MyDrive/sinhala-dataset/test_data.json\"\n" - ], - "metadata": { - "id": "Ls-xo82WBbUg" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df = pd.read_json(testing_path)\n", - "df.head()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "by3bnTMCMh6K", - "outputId": "bdc10951-f15e-4918-b7cd-84dd5535b4e5" - }, - "execution_count": 7, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " text domain label\n", - "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 1\n", - "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n", - "2 ඒකි ඒම නෑ බං # jaysays 0 0\n", - "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 1\n", - "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
textdomainlabel
0@USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ01
1@USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක...00
2ඒකි ඒම නෑ බං # jaysays00
3@USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා...01
4කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර...00
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "df", - "summary": "{\n \"name\": \"df\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5000,\n \"samples\": [\n \"\\u0d87\\u0dba\\u0dd2 \\u0dba\\u0d9a\\u0ddd \\u0dbd\\u0d82\\u0d9a\\u0dcf\\u0dc0\\u0dda \\u0db6\\u0dd9\\u0dbb\\u0dd2 ! \\u0daf\\u0dd3\\u0db4\\u0dbd\\u0dca\\u0dbd\\u0dcf # \\u0dc0\\u0dd9\\u0dbb\\u0dd2 # \\u0d9c\\u0dbd\\u0dca\\u0db6\\u0ddd\\u0dad\\u0dbd\\u0dca # GenElecSL # SriLanka # TamilNadu # Election2015\",\n \"@USER @USER \\u0d9a\\u0dcf\\u0dbd\\u0d9a\\u0db1\\u0dca\\u0db1\\u0dd2 \\u0d95\\u0d9a\\u0dd4\\u0db1\\u0d9c\\u0dd9 \\u0db4\\u0dd4\\u0d9a \\u0db8\\u0dc4\\u0dbd\\u0dcf \\u0db6\\u0da9\\u0dba\\u0db1\\u0dca\\u0db1 \\u0db6\\u0dd9\\u0dc4\\u0dd9\\u0dad\\u0dca \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0d95\\u0db1\\u0dcf\",\n \"\\u0dc3\\u0dd3\\u0dbd\\u0dcf\\u0dc0\\u0dad\\u0dd4\\u0dbb \\u0db1\\u0dcf / \\u0dc3\\u0dd9\\u0db6\\u0dc5\\u0dd4 \\u0dad\\u0dd2\\u0daf\\u0dd9\\u0db1\\u0d9a\\u0dd4\\u0da7 \\u0db4\\u0dc4\\u0dbb\\u0daf\\u0dd3 \\u0dc3\\u0dd9\\u0db6\\u0dbd\\u0dd9\\u0d9a\\u0dca \\u0dc4\\u0dd2\\u0dbb\\u0d9a\\u0dbb\\u0dba\\u0dd2 \\u0db6\\u0dda\\u0dbb\\u0dcf\\u0d9c\\u0dad\\u0dca\\u0dad\\u0dd9 \\u0db1\\u0dd3\\u0dad\\u0dd2\\u0db8\\u0dba \\u0db4\\u0dd2\\u0dba\\u0dc0\\u0dbb \\u0db1\\u0ddc\\u0d9c\\u0db1\\u0dca\\u0db1 \\u0db4\\u0ddc\\u0dbb\\u0ddc\\u0db1\\u0dca\\u0daf\\u0dd4\\u0dc0 \\u0db8\\u0dad\\u0dba \\u0db1\\u0ddc\\u0daf\\u0d9a\\u0dd2\\u0db1\\u0dca URL via @USER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 7 - } - ] - }, - { - "cell_type": "code", - "source": [ - "df =df[:100]" - ], - "metadata": { - "id": "LX0T74ZtM9j9" - }, - "execution_count": 8, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df[\"label\"] = int(0)" - ], - "metadata": { - "id": "60iL_I8ONCts" - }, - "execution_count": 9, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df.head()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "SE4yeguZNIo-", - "outputId": "110eb559-0dd0-4f2e-cb1c-694100365a31" - }, - "execution_count": 10, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " text domain label\n", - "0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 0\n", - "1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n", - "2 ඒකි ඒම නෑ බං # jaysays 0 0\n", - "3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 0\n", - "4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
textdomainlabel
0@USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ00
1@USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක...00
2ඒකි ඒම නෑ බං # jaysays00
3@USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා...00
4කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර...00
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "df", - "summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"\\u0d89\\u0dc3\\u0dca\\u0dc3\\u0dbb \\u0d8b\\u0db6 \\u0dc0\\u0dbd\\u0dd2\\u0dba\\u0d9a\\u0dca \\u0db1\\u0db8\\u0dca \\u0dbd\\u0ddc\\u0dc0\\u0dd9\\u0dad\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0ddc \\u0d9c\\u0dd2\\u0dc4\\u0dcf\\u0db1\\u0dca \\u0d85\\u0dba\\u0dd2\\u0dba\\u0dcf \\u0d89\\u0d9a\\u0dca\\u0db8\\u0db1\\u0da7 \\u0dc0\\u0dbb\\u0dd9\\u0db1\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0dbd\\u0dcf \\u0d85\\u0da9 \\u0d9c\\u0dc4\\u0dbd\\u0dcf \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0dca\\u0db1\\u0dda \\u0dad\\u0ddc \\u0daf\\u0dd0\\u0db1\\u0dca \\u0dc0\\u0dd9\\u0db1\\u0dc3\\u0dca \\u0d9a\\u0db8\\u0dca \\u0d9a\\u0dbb\\u0db1\\u0dc0\\u0dcf \\u0db8\\u0db1\\u0dca \\u0dad\\u0ddc\\u0da7 \\u0dc0\\u0ddb\\u0dbb\\u0dba\",\n \"\\u0d85\\u0db1\\u0dd4\\u0dbb\\u0d9c\\u0dd9 \\u0dc3\\u0da7\\u0dca\\u0da7\\u0dd0\\u0db9\\u0dd2\\u0dba\\u0db1\\u0dca\\u0d9c\\u0dd9 \\u0d89\\u0dad\\u0dca\\u0dad\\u0dd1\\u0dc0\\u0ddc \\u0daf\\u0dd4\\u0d91\\u0d85\\u0db1 \\u0d91\\u0d9a \\u0d85\\u0dc4\\u0db1\\u0dca\\u0db1\\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca\\u0daf \\u0dad\\u0dc0 \\u0db6\\u0dd0\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0da7\\u0dca\\u0d9c\\u0dc0 \\u0db8\\u0da9\\u0d9c\\u0dc4\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0d8b\\u0db9\\u0dbd\\u0d9c\\u0dd9 \\u0dc3\\u0d9a\\u0dca\\u0d9a\\u0dd2\\u0dbd\\u0dd2 \\u0db4\\u0dbb \\u0d9c\\u0dad\\u0dd2 \\u0dbd\\u0ddd\\u0d9a\\u0dd9\\u0da7\\u0db8 \\u0db4\\u0dd9\\u0db1\\u0dca\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd . % \\u0da7 \\u0d87\\u0daf\\u0dbd \\u0daf\\u0dd0\\u0db8\\u0dca\\u0db8\\u0dd9\\u0dad\\u0dca \\u0db8\\u0dda\\u0d9a\\u0dd9 \\u0d87\\u0db8\\u0db1\\u0dd9\\u0db1 \\u0dc3\\u0dd2\\u0db4\\u0dca\\u0db4\\u0dd2\\u0d9a\\u0da7\\u0dd4\",\n \"\\u0db8\\u0dbd\\u0dca\\u0dbd\\u0dd2\\u0d9c\\u0dd9 \\u0d8b\\u0db4\\u0db1\\u0dca\\u0daf\\u0dd2\\u0db1\\u0dda\\u0da7 \\u0dc4\\u0dd9\\u0da7 \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca \\u0d9a\\u0dbd\\u0dca\\u0db4\\u0db1\\u0dcf \\u0d9a\\u0dbb \\u0d9a\\u0dbb \\u0d89\\u0daf\\u0dca\\u0daf\\u0dd2 \\u0dbd\\u0ddc\\u0d9a\\u0dca\\u0d9a\\u0dcf \\u0daf\\u0dd4\\u0db1\\u0dca\\u0db1 \\u0dc3\\u0db4\\u0dca\\u200d\\u0dbb\\u0dcf\\u0dba\\u0dd2\\u0dc3\\u0dca \\u0d91\\u0d9a \\u0db1\\u0db8\\u0dca \\u0db4\\u0da7\\u0dca\\u0da7 . . \\u0dbb\\u0dad\\u0dd2\\u0da4\\u0dca\\u0da4\\u0dcf \\u0db4\\u0dd9\\u0da7\\u0dca\\u0da7\\u0dd2\\u0dba\\u0d9a\\u0dd2\\u0db1\\u0dca \\u0dc2\\u0dda\\u0db4\\u0dca \\u0d8b\\u0db1\\u0dd1\\u0d9a\\u0dd2 \\u0dc4\\u0dd0\\u0db8\\u0dd2\\u0db1\\u0dda\\u0dc2\\u0db1\\u0dca ourNation HappyBirthday\\u0d92\\u0d85\\u0dc3\\u0dca\\u0dc3\\u0dda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 10 - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(len(df))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zTRfsZ_tNLif", - "outputId": "d0012de3-5298-4be5-b280-dee66208a034" - }, - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "100\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "path = '/content/drive/MyDrive/sinhala-dataset'\n", - "testing_json = \"/testing.json\"" - ], - "metadata": { - "id": "weZ2_xujNW1b" - }, - "execution_count": 12, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df.to_json(path + testing_json, orient='records')\n" - ], - "metadata": { - "id": "HzAfca0LNUDx" - }, - "execution_count": 13, - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 400, - "referenced_widgets": [ - "1edd00396f2d45a7b32079d43bc62634", - "a96994a464df43918566f6cc967e7148", - "92143f1854c44349a3d0f6b7838b6a5c", - "b90793e5e29c435cab6fd7b1e059c992", - "8ce96d68c1e443b28e1200f106fefb02", - "dcfa2646664e449c98a00e89b2b7984d", - "28bcfe01e8a64ba08ce62e9715ad85e4", - "1b72e7f7e85a49fb8c7a79bce1989647", - "76523fc98b644aaaaf6c605544e9fffb", - "b2d2a9eabbe14cddaa7d0aa39e7a1953", - "bf4db198f72441b48a5dbff8515a1f91", - "48e8e488c27a4948a455835f6caf2ce2", - "2fb5a8ac30ed49df93056bc6802e8ee0", - "c29fcea40de347bf9f274f375b9123a5", - "00cd02215e1f4225a4cf93b46b9a7e15", - "e1064a6f8bfd4435a6ad15d08ff44699", - "956884e6e8bc43f4bc51a2b75c131889", - "4e23a4eec35f4f8ea3114d9cb0ea1e04", - "e7a3964adec34bf6b37f52cf1119fa9c", - "22a128f583aa4514a1e71d0f8aaf8e79", - "3f902b8cb652446c84609cd730a64e35", - "a78b7664a1e346f181b203bb1645eb9b", - "3bf7edeed06a4ef3b1ce28f24201c84b", - "205cbd07f34345c48b1e72bc2cb9a93b", - "a2d328e2313a49aab752cd2ba38220b5", - "a3696eb89c4e434683bb5416d91602db", - "e04dfc9c2e5f437c8dd9b15f33c04a4a", - "1f5b368654494327bfc8d1c315f13832", - "853eb13a56ec4fbf89e25f333798132e", - "5713250ff3864029a3668c6a7eb1f3e3", - "944ed217567144459ea5279c34f529f3", - "1e0faaa1a09f42f8964a3203472f50c2", - "7c3da939876e4a6f8f2969fbf96bbcd0", - "aacafd29b7b5403bb8a7df1ebe2a731e", - "ec944b4365c34ab6813af9d925e2a552", - "8d48d360da5945bfbf300ae455043c07", - "36f7f5ae075f4c59a44283e25088eaab", - "8cfa54ddcf354e0e9f71102656a744cf", - "edcb22216cac4bcf83a301d975f20d2d", - "3ace28614fe446f18268578e56b5ec14", - "3c8f3c862f744ddba9524079c636124a", - "36eb36dba13d4ad4a73b401e0dc22c42", - "236bb6124df1443684b16dd34fba2ed4", - "3264137a43cc4725948aab030421b24c", - "dd76625672d74095a0f691206646fbd8", - "a10c0e99afb546d79fca304e8a8e6ab1", - "6fa7911781ef42949e56c80dc1f85299", - "832e08b2b4524680bca9d71c363f3232", - "10ec65b4df86458ba2eee2eeccdc91e6", - "764921df2d7b438b8a8ad0d7b68b8b6e", - "12f7b959b26b472099ea4e06b606772a", - "32c6cb10b1e946a89a7b08505c1582e9", - "3ee11622c9a0405abf5f246720d358b2", - "d78ced95d2134e299573c1e8d712b3f8", - "6ff7b4b691a74f6ab232c26ee55b9982", - "2cc542103450405a853945ff07471932", - "5807fb12507f4fc0aaa4d083477aba27", - "11d2ce4ace194bb1825fefacf3cc36f0", - "b22724e628384993aa52c255c8bffc6f", - "e4dc10b2ef9c4a1f99689ebe9d48886a", - "6b580a221e2f48b5b058c6a6189cc99d", - "e5f30b22e01c4b74a2f20f3b9880d7ae", - "e353ffe187d94729a65453fcd3d8a9a3", - "b056d5ef8b9244828e10f685e38d47bb", - "32ce45ecf0d64c81bea12692ad52ed45", - "b930858da1a2407ba49a8a8a17a1fc70", - "d366271fc98943aa8fe3da314c1e95dc", - "364ab880ac304099b1bb83a92d6a7eed", - "4a8d5b1ca09d493c8b93baf92e7fd5ae", - "c6867a46aa064e26831be8a3a4278905", - "053fc43d2aad4aeeb114b8fd9aa2aef6", - "a9583dca84a14e9bb89e58a430c391e3", - "15b6e4ff685f4f5e8c997e151d2b4007", - "d73e215040114141bc60b0f58a1f8646", - "18ab1cf13c494b02af122ee0069e3c92", - "839cabb3e69c46549665769afcc24e08", - "56fe41cf83454378a9345ee4e9a26192" - ] - }, - "id": "ROUE4LV1d_tp", - "outputId": "7f3d865e-97c6-434e-a8ae-f69a3462586a" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:80: UserWarning: \n", - "Access to the secret `HF_TOKEN` has not been granted on this notebook.\n", - "You will not be requested again.\n", - "Please restart the session if you want to be prompted again.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/1.38k [00:00" - ] - }, - "metadata": {}, - "execution_count": 14 - } - ], - "source": [ - "MODEL_SAVE_PATH = \"/content/drive/MyDrive/models-path-improvement/last-epoch-model-2024-03-08-15_34_03_6.pth\"\n", - "\n", - "max_len, bert = 160 , 'sinhala-nlp/sinbert-sold-si'\n", - "tokenizer = TokenizerFromPreTrained(max_len, bert)\n", - "\n", - "# dataset\n", - "batch_size = 100\n", - "\n", - "\n", - "testing_path = path + testing_json\n", - "\n", - "testing_set = TextDataset(testing_path, ['text'], tokenizer)\n", - "testing_loader = DataLoader(testing_set, batch_size, shuffle=False)\n", - "\n", - "# prepare model\n", - "domain_num = 3\n", - "\n", - "model = MDFEND(bert, domain_num , expert_num=18 , mlp_dims = [5080 ,4020, 3010, 2024 ,1012 ,606 , 400])\n", - "model.load_state_dict(torch.load(f=MODEL_SAVE_PATH, map_location=torch.device('cpu')))\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "source": [ - "outputs = []\n", - "for batch_data in testing_loader:\n", - " outputs.append(model.predict(batch_data))" - ], - "metadata": { - "id": "nsTmmtm7ENK7" - }, - "execution_count": 15, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "outputs" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "MgJFRW6uOTNK", - "outputId": "37176cb8-b2e6-4c3b-c852-c4e3a17cda30" - }, - "execution_count": 16, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[tensor([1.3248e-03, 2.0616e-01, 4.5341e-02, 9.3156e-01, 2.3167e-01, 9.9967e-01,\n", - " 6.8980e-02, 8.8265e-01, 4.6962e-01, 1.4711e-01, 3.9079e-01, 1.5254e-02,\n", - " 1.4336e-01, 9.9974e-01, 9.4320e-02, 9.6368e-01, 3.0400e-01, 1.1099e-02,\n", - " 8.6662e-01, 9.0376e-02, 4.0686e-01, 9.9839e-01, 9.9700e-01, 4.9826e-02,\n", - " 9.6036e-01, 3.1445e-02, 7.8756e-01, 5.2800e-01, 9.4090e-01, 9.9148e-01,\n", - " 9.9725e-01, 1.6041e-02, 2.9223e-01, 1.5572e-01, 7.2350e-02, 8.2344e-02,\n", - " 5.4701e-03, 7.9817e-01, 1.6082e-03, 2.3789e-01, 2.0766e-02, 9.8514e-01,\n", - " 1.4062e-02, 9.8410e-01, 5.0685e-01, 1.0039e-01, 3.5957e-01, 4.6990e-01,\n", - " 6.0348e-01, 5.4888e-01, 9.7326e-02, 1.4647e-03, 2.0198e-02, 9.9995e-01,\n", - " 8.6098e-01, 7.3051e-01, 3.0538e-03, 9.9967e-01, 5.4075e-03, 2.4586e-02,\n", - " 2.1326e-01, 9.9988e-01, 7.7565e-01, 7.3468e-01, 8.2214e-02, 8.3052e-03,\n", - " 3.7278e-01, 3.6124e-01, 2.4839e-01, 2.4560e-01, 3.9281e-02, 9.9611e-01,\n", - " 2.3351e-02, 1.9584e-01, 1.1381e-01, 2.4559e-01, 6.5344e-01, 3.5736e-01,\n", - " 8.6219e-04, 3.8071e-01, 5.6490e-01, 2.5499e-02, 6.1897e-02, 9.0802e-01,\n", - " 8.2842e-02, 8.5183e-04, 6.4453e-01, 6.0612e-01, 3.8544e-01, 4.0832e-02,\n", - " 6.0973e-01, 4.7808e-02, 7.0927e-01, 8.4603e-01, 5.6889e-01, 5.3337e-01,\n", - " 1.2113e-01, 8.6022e-01, 8.5642e-03, 9.9990e-01],\n", - " grad_fn=)]" - ] - }, - "metadata": {}, - "execution_count": 16 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# 1 ====> offensive\n", - "# 0 ====> not offensive\n", - "label = []\n", - "for output in outputs:\n", - " for out in output:\n", - " output_prob = out.item()\n", - " if output_prob >= 0.5:\n", - " label.append(1)\n", - " else:\n", - " label.append(0)\n" - ], - "metadata": { - "id": "ySdut6vMOvQY" - }, - "execution_count": 20, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "label" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "8thgk3ykPTOD", - "outputId": "fb05dd91-01d4-44a8-ef66-caf0eda24831" - }, - "execution_count": 21, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 1,\n", - " 0,\n", - " 1,\n", - " 0,\n", - " 1]" - ] - }, - "metadata": {}, - "execution_count": 21 - } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "P4eIsw1DPYQG" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file