{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"1edd00396f2d45a7b32079d43bc62634": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_a96994a464df43918566f6cc967e7148",
"IPY_MODEL_92143f1854c44349a3d0f6b7838b6a5c",
"IPY_MODEL_b90793e5e29c435cab6fd7b1e059c992"
],
"layout": "IPY_MODEL_8ce96d68c1e443b28e1200f106fefb02"
}
},
"a96994a464df43918566f6cc967e7148": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_dcfa2646664e449c98a00e89b2b7984d",
"placeholder": "",
"style": "IPY_MODEL_28bcfe01e8a64ba08ce62e9715ad85e4",
"value": "tokenizer_config.json: 100%"
}
},
"92143f1854c44349a3d0f6b7838b6a5c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1b72e7f7e85a49fb8c7a79bce1989647",
"max": 1375,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_76523fc98b644aaaaf6c605544e9fffb",
"value": 1375
}
},
"b90793e5e29c435cab6fd7b1e059c992": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b2d2a9eabbe14cddaa7d0aa39e7a1953",
"placeholder": "",
"style": "IPY_MODEL_bf4db198f72441b48a5dbff8515a1f91",
"value": " 1.38k/1.38k [00:00<00:00, 36.2kB/s]"
}
},
"8ce96d68c1e443b28e1200f106fefb02": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"dcfa2646664e449c98a00e89b2b7984d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"28bcfe01e8a64ba08ce62e9715ad85e4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1b72e7f7e85a49fb8c7a79bce1989647": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"76523fc98b644aaaaf6c605544e9fffb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b2d2a9eabbe14cddaa7d0aa39e7a1953": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"bf4db198f72441b48a5dbff8515a1f91": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"48e8e488c27a4948a455835f6caf2ce2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_2fb5a8ac30ed49df93056bc6802e8ee0",
"IPY_MODEL_c29fcea40de347bf9f274f375b9123a5",
"IPY_MODEL_00cd02215e1f4225a4cf93b46b9a7e15"
],
"layout": "IPY_MODEL_e1064a6f8bfd4435a6ad15d08ff44699"
}
},
"2fb5a8ac30ed49df93056bc6802e8ee0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_956884e6e8bc43f4bc51a2b75c131889",
"placeholder": "",
"style": "IPY_MODEL_4e23a4eec35f4f8ea3114d9cb0ea1e04",
"value": "vocab.json: 100%"
}
},
"c29fcea40de347bf9f274f375b9123a5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e7a3964adec34bf6b37f52cf1119fa9c",
"max": 1500217,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_22a128f583aa4514a1e71d0f8aaf8e79",
"value": 1500217
}
},
"00cd02215e1f4225a4cf93b46b9a7e15": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3f902b8cb652446c84609cd730a64e35",
"placeholder": "",
"style": "IPY_MODEL_a78b7664a1e346f181b203bb1645eb9b",
"value": " 1.50M/1.50M [00:00<00:00, 9.80MB/s]"
}
},
"e1064a6f8bfd4435a6ad15d08ff44699": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"956884e6e8bc43f4bc51a2b75c131889": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4e23a4eec35f4f8ea3114d9cb0ea1e04": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e7a3964adec34bf6b37f52cf1119fa9c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"22a128f583aa4514a1e71d0f8aaf8e79": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"3f902b8cb652446c84609cd730a64e35": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a78b7664a1e346f181b203bb1645eb9b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3bf7edeed06a4ef3b1ce28f24201c84b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_205cbd07f34345c48b1e72bc2cb9a93b",
"IPY_MODEL_a2d328e2313a49aab752cd2ba38220b5",
"IPY_MODEL_a3696eb89c4e434683bb5416d91602db"
],
"layout": "IPY_MODEL_e04dfc9c2e5f437c8dd9b15f33c04a4a"
}
},
"205cbd07f34345c48b1e72bc2cb9a93b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1f5b368654494327bfc8d1c315f13832",
"placeholder": "",
"style": "IPY_MODEL_853eb13a56ec4fbf89e25f333798132e",
"value": "merges.txt: 100%"
}
},
"a2d328e2313a49aab752cd2ba38220b5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5713250ff3864029a3668c6a7eb1f3e3",
"max": 1146413,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_944ed217567144459ea5279c34f529f3",
"value": 1146413
}
},
"a3696eb89c4e434683bb5416d91602db": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1e0faaa1a09f42f8964a3203472f50c2",
"placeholder": "",
"style": "IPY_MODEL_7c3da939876e4a6f8f2969fbf96bbcd0",
"value": " 1.15M/1.15M [00:00<00:00, 14.1MB/s]"
}
},
"e04dfc9c2e5f437c8dd9b15f33c04a4a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1f5b368654494327bfc8d1c315f13832": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"853eb13a56ec4fbf89e25f333798132e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"5713250ff3864029a3668c6a7eb1f3e3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"944ed217567144459ea5279c34f529f3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"1e0faaa1a09f42f8964a3203472f50c2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7c3da939876e4a6f8f2969fbf96bbcd0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"aacafd29b7b5403bb8a7df1ebe2a731e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ec944b4365c34ab6813af9d925e2a552",
"IPY_MODEL_8d48d360da5945bfbf300ae455043c07",
"IPY_MODEL_36f7f5ae075f4c59a44283e25088eaab"
],
"layout": "IPY_MODEL_8cfa54ddcf354e0e9f71102656a744cf"
}
},
"ec944b4365c34ab6813af9d925e2a552": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_edcb22216cac4bcf83a301d975f20d2d",
"placeholder": "",
"style": "IPY_MODEL_3ace28614fe446f18268578e56b5ec14",
"value": "tokenizer.json: 100%"
}
},
"8d48d360da5945bfbf300ae455043c07": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3c8f3c862f744ddba9524079c636124a",
"max": 3529879,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_36eb36dba13d4ad4a73b401e0dc22c42",
"value": 3529879
}
},
"36f7f5ae075f4c59a44283e25088eaab": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_236bb6124df1443684b16dd34fba2ed4",
"placeholder": "",
"style": "IPY_MODEL_3264137a43cc4725948aab030421b24c",
"value": " 3.53M/3.53M [00:00<00:00, 22.8MB/s]"
}
},
"8cfa54ddcf354e0e9f71102656a744cf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"edcb22216cac4bcf83a301d975f20d2d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3ace28614fe446f18268578e56b5ec14": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3c8f3c862f744ddba9524079c636124a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"36eb36dba13d4ad4a73b401e0dc22c42": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"236bb6124df1443684b16dd34fba2ed4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3264137a43cc4725948aab030421b24c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"dd76625672d74095a0f691206646fbd8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_a10c0e99afb546d79fca304e8a8e6ab1",
"IPY_MODEL_6fa7911781ef42949e56c80dc1f85299",
"IPY_MODEL_832e08b2b4524680bca9d71c363f3232"
],
"layout": "IPY_MODEL_10ec65b4df86458ba2eee2eeccdc91e6"
}
},
"a10c0e99afb546d79fca304e8a8e6ab1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_764921df2d7b438b8a8ad0d7b68b8b6e",
"placeholder": "",
"style": "IPY_MODEL_12f7b959b26b472099ea4e06b606772a",
"value": "special_tokens_map.json: 100%"
}
},
"6fa7911781ef42949e56c80dc1f85299": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_32c6cb10b1e946a89a7b08505c1582e9",
"max": 957,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_3ee11622c9a0405abf5f246720d358b2",
"value": 957
}
},
"832e08b2b4524680bca9d71c363f3232": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d78ced95d2134e299573c1e8d712b3f8",
"placeholder": "",
"style": "IPY_MODEL_6ff7b4b691a74f6ab232c26ee55b9982",
"value": " 957/957 [00:00<00:00, 43.5kB/s]"
}
},
"10ec65b4df86458ba2eee2eeccdc91e6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"764921df2d7b438b8a8ad0d7b68b8b6e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"12f7b959b26b472099ea4e06b606772a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"32c6cb10b1e946a89a7b08505c1582e9": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3ee11622c9a0405abf5f246720d358b2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"d78ced95d2134e299573c1e8d712b3f8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6ff7b4b691a74f6ab232c26ee55b9982": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"2cc542103450405a853945ff07471932": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_5807fb12507f4fc0aaa4d083477aba27",
"IPY_MODEL_11d2ce4ace194bb1825fefacf3cc36f0",
"IPY_MODEL_b22724e628384993aa52c255c8bffc6f"
],
"layout": "IPY_MODEL_e4dc10b2ef9c4a1f99689ebe9d48886a"
}
},
"5807fb12507f4fc0aaa4d083477aba27": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6b580a221e2f48b5b058c6a6189cc99d",
"placeholder": "",
"style": "IPY_MODEL_e5f30b22e01c4b74a2f20f3b9880d7ae",
"value": "config.json: 100%"
}
},
"11d2ce4ace194bb1825fefacf3cc36f0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e353ffe187d94729a65453fcd3d8a9a3",
"max": 696,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b056d5ef8b9244828e10f685e38d47bb",
"value": 696
}
},
"b22724e628384993aa52c255c8bffc6f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_32ce45ecf0d64c81bea12692ad52ed45",
"placeholder": "",
"style": "IPY_MODEL_b930858da1a2407ba49a8a8a17a1fc70",
"value": " 696/696 [00:00<00:00, 35.8kB/s]"
}
},
"e4dc10b2ef9c4a1f99689ebe9d48886a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6b580a221e2f48b5b058c6a6189cc99d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e5f30b22e01c4b74a2f20f3b9880d7ae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e353ffe187d94729a65453fcd3d8a9a3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b056d5ef8b9244828e10f685e38d47bb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"32ce45ecf0d64c81bea12692ad52ed45": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b930858da1a2407ba49a8a8a17a1fc70": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d366271fc98943aa8fe3da314c1e95dc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_364ab880ac304099b1bb83a92d6a7eed",
"IPY_MODEL_4a8d5b1ca09d493c8b93baf92e7fd5ae",
"IPY_MODEL_c6867a46aa064e26831be8a3a4278905"
],
"layout": "IPY_MODEL_053fc43d2aad4aeeb114b8fd9aa2aef6"
}
},
"364ab880ac304099b1bb83a92d6a7eed": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a9583dca84a14e9bb89e58a430c391e3",
"placeholder": "",
"style": "IPY_MODEL_15b6e4ff685f4f5e8c997e151d2b4007",
"value": "pytorch_model.bin: 100%"
}
},
"4a8d5b1ca09d493c8b93baf92e7fd5ae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d73e215040114141bc60b0f58a1f8646",
"max": 506353257,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_18ab1cf13c494b02af122ee0069e3c92",
"value": 506353257
}
},
"c6867a46aa064e26831be8a3a4278905": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_839cabb3e69c46549665769afcc24e08",
"placeholder": "",
"style": "IPY_MODEL_56fe41cf83454378a9345ee4e9a26192",
"value": " 506M/506M [00:09<00:00, 57.3MB/s]"
}
},
"053fc43d2aad4aeeb114b8fd9aa2aef6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a9583dca84a14e9bb89e58a430c391e3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"15b6e4ff685f4f5e8c997e151d2b4007": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d73e215040114141bc60b0f58a1f8646": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"18ab1cf13c494b02af122ee0069e3c92": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"839cabb3e69c46549665769afcc24e08": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"56fe41cf83454378a9345ee4e9a26192": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "EWlgdx43A_NL",
"outputId": "a2431529-0c26-4076-a6cf-4c9146c4f9b0"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"source": [
"! pip install faknow\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7dssDha5BGNL",
"outputId": "5d303d54-6c32-482d-83e7-c46a4506cbce"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting faknow\n",
" Downloading faknow-0.0.3-py3-none-any.whl (147 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.6/147.6 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: transformers>=4.26.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.38.2)\n",
"Requirement already satisfied: numpy>=1.23.4 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.25.2)\n",
"Requirement already satisfied: pandas>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.5.3)\n",
"Requirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.2.2)\n",
"Requirement already satisfied: tensorboard>=2.10.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (2.15.2)\n",
"Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.66.2)\n",
"Requirement already satisfied: jieba>=0.42.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.42.1)\n",
"Requirement already satisfied: gensim>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.3.2)\n",
"Requirement already satisfied: pillow>=9.3.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (9.4.0)\n",
"Requirement already satisfied: nltk>=3.7 in /usr/local/lib/python3.10/dist-packages (from faknow) (3.8.1)\n",
"Collecting sphinx-markdown-tables>=0.0.17 (from faknow)\n",
" Downloading sphinx_markdown_tables-0.0.17-py3-none-any.whl (28 kB)\n",
"Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (1.11.4)\n",
"Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (6.4.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (8.1.7)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (1.3.2)\n",
"Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (2023.12.25)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2023.4)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1.3->faknow) (3.3.0)\n",
"Requirement already satisfied: markdown>=3.4 in /usr/local/lib/python3.10/dist-packages (from sphinx-markdown-tables>=0.0.17->faknow) (3.5.2)\n",
"Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.4.0)\n",
"Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.62.0)\n",
"Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.27.0)\n",
"Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.2.0)\n",
"Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.20.3)\n",
"Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (2.31.0)\n",
"Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (67.7.2)\n",
"Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.16.0)\n",
"Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (0.7.2)\n",
"Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.0.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (3.13.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.20.3)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (23.2)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (6.0.1)\n",
"Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.15.2)\n",
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.4.2)\n",
"Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (5.3.3)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.3.0)\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (4.9)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (1.3.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (2023.6.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers>=4.26.1->faknow) (4.10.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (3.6)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.10.0->faknow) (2024.2.2)\n",
"Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->faknow) (2.1.5)\n",
"Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.10.0->faknow) (0.5.1)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.10.0->faknow) (3.2.2)\n",
"Installing collected packages: sphinx-markdown-tables, faknow\n",
"Successfully installed faknow-0.0.3 sphinx-markdown-tables-0.0.17\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "Zo3_tdxod_tn"
},
"outputs": [],
"source": [
"from typing import List, Optional, Tuple\n",
"\n",
"import torch\n",
"from torch import Tensor\n",
"from torch import nn\n",
"from transformers import RobertaModel\n",
"\n",
"from faknow.model.layers.layer import TextCNNLayer\n",
"from faknow.model.model import AbstractModel\n",
"import pandas as pd\n",
"\n",
"\n",
"class _MLP(nn.Module):\n",
" def __init__(self,\n",
" input_dim: int,\n",
" embed_dims: List[int],\n",
" dropout_rate: float,\n",
" output_layer=True):\n",
" super().__init__()\n",
" layers = list()\n",
" for embed_dim in embed_dims:\n",
" layers.append(nn.Linear(input_dim, embed_dim))\n",
" layers.append(nn.BatchNorm1d(embed_dim))\n",
" layers.append(nn.ReLU())\n",
" layers.append(nn.Dropout(p=dropout_rate))\n",
" input_dim = embed_dim\n",
" if output_layer:\n",
" layers.append(torch.nn.Linear(input_dim, 1))\n",
" self.mlp = torch.nn.Sequential(*layers)\n",
"\n",
" def forward(self, x: Tensor) -> Tensor:\n",
" \"\"\"\n",
"\n",
" Args:\n",
" x (Tensor): shared feature from domain and text, shape=(batch_size, embed_dim)\n",
"\n",
" \"\"\"\n",
" return self.mlp(x)\n",
"\n",
"\n",
"class _MaskAttentionLayer(torch.nn.Module):\n",
" \"\"\"\n",
" Compute attention layer\n",
" \"\"\"\n",
" def __init__(self, input_size: int):\n",
" super(_MaskAttentionLayer, self).__init__()\n",
" self.attention_layer = torch.nn.Linear(input_size, 1)\n",
"\n",
" def forward(self,\n",
" inputs: Tensor,\n",
" mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:\n",
" weights = self.attention_layer(inputs).view(-1, inputs.size(1))\n",
" if mask is not None:\n",
" weights = weights.masked_fill(mask == 0, float(\"-inf\"))\n",
" weights = torch.softmax(weights, dim=-1).unsqueeze(1)\n",
" outputs = torch.matmul(weights, inputs).squeeze(1)\n",
" return outputs, weights\n",
"\n",
"\n",
"class MDFEND(AbstractModel):\n",
" r\"\"\"\n",
" MDFEND: Multi-domain Fake News Detection, CIKM 2021\n",
" paper: https://dl.acm.org/doi/10.1145/3459637.3482139\n",
" code: https://github.com/kennqiang/MDFEND-Weibo21\n",
" \"\"\"\n",
" def __init__(self,\n",
" pre_trained_bert_name: str,\n",
" domain_num: int,\n",
" mlp_dims: Optional[List[int]] = None,\n",
" dropout_rate=0.2,\n",
" expert_num=5):\n",
" \"\"\"\n",
"\n",
" Args:\n",
" pre_trained_bert_name (str): the name or local path of pre-trained bert model\n",
" domain_num (int): total number of all domains\n",
" mlp_dims (List[int]): a list of the dimensions in MLP layer, if None, [384] will be taken as default, default=384\n",
" dropout_rate (float): rate of Dropout layer, default=0.2\n",
" expert_num (int): number of experts also called TextCNNLayer, default=5\n",
" \"\"\"\n",
" super(MDFEND, self).__init__()\n",
" self.domain_num = domain_num\n",
" self.expert_num = expert_num\n",
" self.bert = RobertaModel.from_pretrained(\n",
" pre_trained_bert_name).requires_grad_(False)\n",
" self.embedding_size = self.bert.config.hidden_size\n",
" self.loss_func = nn.BCELoss()\n",
" if mlp_dims is None:\n",
" mlp_dims = [384]\n",
"\n",
" filter_num = 64\n",
" filter_sizes = [1, 2, 3, 5, 10]\n",
" experts = [\n",
" TextCNNLayer(self.embedding_size, filter_num, filter_sizes)\n",
" for _ in range(self.expert_num)\n",
" ]\n",
" self.experts = nn.ModuleList(experts)\n",
"\n",
" self.gate = nn.Sequential(\n",
" nn.Linear(self.embedding_size * 2, mlp_dims[-1]), nn.ReLU(),\n",
" nn.Linear(mlp_dims[-1], self.expert_num), nn.Softmax(dim=1))\n",
"\n",
" self.attention = _MaskAttentionLayer(self.embedding_size)\n",
"\n",
" self.domain_embedder = nn.Embedding(num_embeddings=self.domain_num,\n",
" embedding_dim=self.embedding_size)\n",
" self.classifier = _MLP(320, mlp_dims, dropout_rate)\n",
"\n",
" def forward(self, token_id: Tensor, mask: Tensor,\n",
" domain: Tensor) -> Tensor:\n",
" \"\"\"\n",
"\n",
" Args:\n",
" token_id (Tensor): token ids from bert tokenizer, shape=(batch_size, max_len)\n",
" mask (Tensor): mask from bert tokenizer, shape=(batch_size, max_len)\n",
" domain (Tensor): domain id, shape=(batch_size,)\n",
"\n",
" Returns:\n",
" FloatTensor: the prediction of being fake, shape=(batch_size,)\n",
" \"\"\"\n",
" text_embedding = self.bert(token_id,\n",
" attention_mask=mask).last_hidden_state\n",
" attention_feature, _ = self.attention(text_embedding, mask)\n",
"\n",
" domain_embedding = self.domain_embedder(domain.view(-1, 1)).squeeze(1)\n",
"\n",
" gate_input = torch.cat([domain_embedding, attention_feature], dim=-1)\n",
" gate_output = self.gate(gate_input)\n",
"\n",
" shared_feature = 0\n",
" for i in range(self.expert_num):\n",
" expert_feature = self.experts[i](text_embedding)\n",
" shared_feature += (expert_feature * gate_output[:, i].unsqueeze(1))\n",
"\n",
" label_pred = self.classifier(shared_feature)\n",
"\n",
" return torch.sigmoid(label_pred.squeeze(1))\n",
"\n",
" def calculate_loss(self, data) -> Tensor:\n",
" \"\"\"\n",
" calculate loss via BCELoss\n",
"\n",
" Args:\n",
" data (dict): batch data dict\n",
"\n",
" Returns:\n",
" loss (Tensor): loss value\n",
" \"\"\"\n",
"\n",
" token_ids = data['text']['token_id']\n",
" masks = data['text']['mask']\n",
" domains = data['domain']\n",
" labels = data['label']\n",
" output = self.forward(token_ids, masks, domains)\n",
" return self.loss_func(output, labels.float())\n",
"\n",
" def predict(self, data_without_label) -> Tensor:\n",
" \"\"\"\n",
" predict the probability of being fake news\n",
"\n",
" Args:\n",
" data_without_label (Dict[str, Any]): batch data dict\n",
"\n",
" Returns:\n",
" Tensor: one-hot probability, shape=(batch_size, 2)\n",
" \"\"\"\n",
"\n",
" token_ids = data_without_label['text']['token_id']\n",
" masks = data_without_label['text']['mask']\n",
" domains = data_without_label['domain']\n",
"\n",
"\n",
" output_prob = self.forward(token_ids, masks,domains)\n",
"\n",
" return output_prob"
]
},
{
"cell_type": "code",
"source": [
"from faknow.data.dataset.text import TextDataset\n",
"from faknow.data.process.text_process import TokenizerFromPreTrained\n",
"from faknow.evaluate.evaluator import Evaluator\n",
"\n",
"import torch\n",
"from torch.utils.data import DataLoader"
],
"metadata": {
"id": "Tg2zBjzUBTbt"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"testing_path = \"/content/drive/MyDrive/sinhala-dataset/test_data.json\"\n"
],
"metadata": {
"id": "Ls-xo82WBbUg"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df = pd.read_json(testing_path)\n",
"df.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "by3bnTMCMh6K",
"outputId": "bdc10951-f15e-4918-b7cd-84dd5535b4e5"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" text domain label\n",
"0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 1\n",
"1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n",
"2 ඒකි ඒම නෑ බං # jaysays 0 0\n",
"3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 1\n",
"4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0"
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" text | \n",
" domain | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" ඒකි ඒම නෑ බං # jaysays | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 5000,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5000,\n \"samples\": [\n \"\\u0d87\\u0dba\\u0dd2 \\u0dba\\u0d9a\\u0ddd \\u0dbd\\u0d82\\u0d9a\\u0dcf\\u0dc0\\u0dda \\u0db6\\u0dd9\\u0dbb\\u0dd2 ! \\u0daf\\u0dd3\\u0db4\\u0dbd\\u0dca\\u0dbd\\u0dcf # \\u0dc0\\u0dd9\\u0dbb\\u0dd2 # \\u0d9c\\u0dbd\\u0dca\\u0db6\\u0ddd\\u0dad\\u0dbd\\u0dca # GenElecSL # SriLanka # TamilNadu # Election2015\",\n \"@USER @USER \\u0d9a\\u0dcf\\u0dbd\\u0d9a\\u0db1\\u0dca\\u0db1\\u0dd2 \\u0d95\\u0d9a\\u0dd4\\u0db1\\u0d9c\\u0dd9 \\u0db4\\u0dd4\\u0d9a \\u0db8\\u0dc4\\u0dbd\\u0dcf \\u0db6\\u0da9\\u0dba\\u0db1\\u0dca\\u0db1 \\u0db6\\u0dd9\\u0dc4\\u0dd9\\u0dad\\u0dca \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0d95\\u0db1\\u0dcf\",\n \"\\u0dc3\\u0dd3\\u0dbd\\u0dcf\\u0dc0\\u0dad\\u0dd4\\u0dbb \\u0db1\\u0dcf / \\u0dc3\\u0dd9\\u0db6\\u0dc5\\u0dd4 \\u0dad\\u0dd2\\u0daf\\u0dd9\\u0db1\\u0d9a\\u0dd4\\u0da7 \\u0db4\\u0dc4\\u0dbb\\u0daf\\u0dd3 \\u0dc3\\u0dd9\\u0db6\\u0dbd\\u0dd9\\u0d9a\\u0dca \\u0dc4\\u0dd2\\u0dbb\\u0d9a\\u0dbb\\u0dba\\u0dd2 \\u0db6\\u0dda\\u0dbb\\u0dcf\\u0d9c\\u0dad\\u0dca\\u0dad\\u0dd9 \\u0db1\\u0dd3\\u0dad\\u0dd2\\u0db8\\u0dba \\u0db4\\u0dd2\\u0dba\\u0dc0\\u0dbb \\u0db1\\u0ddc\\u0d9c\\u0db1\\u0dca\\u0db1 \\u0db4\\u0ddc\\u0dbb\\u0ddc\\u0db1\\u0dca\\u0daf\\u0dd4\\u0dc0 \\u0db8\\u0dad\\u0dba \\u0db1\\u0ddc\\u0daf\\u0d9a\\u0dd2\\u0db1\\u0dca URL via @USER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"df =df[:100]"
],
"metadata": {
"id": "LX0T74ZtM9j9"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df[\"label\"] = int(0)"
],
"metadata": {
"id": "60iL_I8ONCts"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "SE4yeguZNIo-",
"outputId": "110eb559-0dd0-4f2e-cb1c-694100365a31"
},
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" text domain label\n",
"0 @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ 0 0\n",
"1 @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... 0 0\n",
"2 ඒකි ඒම නෑ බං # jaysays 0 0\n",
"3 @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... 0 0\n",
"4 කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... 0 0"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" text | \n",
" domain | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" @USER ඔයාලට වගේ කැත තඩි බඩවල් අපිට නැ | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" @USER , @USER @USER ඒක තමයි දේසිකා එහෙනම් මට ක... | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" ඒකි ඒම නෑ බං # jaysays | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" @USER ඔයා නං හෙන ගොං අප්ප . මෝඩ කමෙන්ට්සමයි දා... | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" කවුරුහරි මොනවහරි ගෙනාවම , අපිත් කාල අම්මට ඉතිර... | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df",
"summary": "{\n \"name\": \"df\",\n \"rows\": 100,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 100,\n \"samples\": [\n \"\\u0d89\\u0dc3\\u0dca\\u0dc3\\u0dbb \\u0d8b\\u0db6 \\u0dc0\\u0dbd\\u0dd2\\u0dba\\u0d9a\\u0dca \\u0db1\\u0db8\\u0dca \\u0dbd\\u0ddc\\u0dc0\\u0dd9\\u0dad\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0ddc \\u0d9c\\u0dd2\\u0dc4\\u0dcf\\u0db1\\u0dca \\u0d85\\u0dba\\u0dd2\\u0dba\\u0dcf \\u0d89\\u0d9a\\u0dca\\u0db8\\u0db1\\u0da7 \\u0dc0\\u0dbb\\u0dd9\\u0db1\\u0dca \\u0d9a\\u0dd2\\u0dba\\u0dbd\\u0dcf \\u0d85\\u0da9 \\u0d9c\\u0dc4\\u0dbd\\u0dcf \\u0d9a\\u0dd2\\u0dba\\u0db1\\u0dca\\u0db1\\u0dda \\u0dad\\u0ddc \\u0daf\\u0dd0\\u0db1\\u0dca \\u0dc0\\u0dd9\\u0db1\\u0dc3\\u0dca \\u0d9a\\u0db8\\u0dca \\u0d9a\\u0dbb\\u0db1\\u0dc0\\u0dcf \\u0db8\\u0db1\\u0dca \\u0dad\\u0ddc\\u0da7 \\u0dc0\\u0ddb\\u0dbb\\u0dba\",\n \"\\u0d85\\u0db1\\u0dd4\\u0dbb\\u0d9c\\u0dd9 \\u0dc3\\u0da7\\u0dca\\u0da7\\u0dd0\\u0db9\\u0dd2\\u0dba\\u0db1\\u0dca\\u0d9c\\u0dd9 \\u0d89\\u0dad\\u0dca\\u0dad\\u0dd1\\u0dc0\\u0ddc \\u0daf\\u0dd4\\u0d91\\u0d85\\u0db1 \\u0d91\\u0d9a \\u0d85\\u0dc4\\u0db1\\u0dca\\u0db1\\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca\\u0daf \\u0dad\\u0dc0 \\u0db6\\u0dd0\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0da7\\u0dca\\u0d9c\\u0dc0 \\u0db8\\u0da9\\u0d9c\\u0dc4\\u0db4\\u0dbd\\u0dca\\u0dbd \\u0d8b\\u0db9\\u0dbd\\u0d9c\\u0dd9 \\u0dc3\\u0d9a\\u0dca\\u0d9a\\u0dd2\\u0dbd\\u0dd2 \\u0db4\\u0dbb \\u0d9c\\u0dad\\u0dd2 \\u0dbd\\u0ddd\\u0d9a\\u0dd9\\u0da7\\u0db8 \\u0db4\\u0dd9\\u0db1\\u0dca\\u0db1\\u0db4\\u0dbd\\u0dca\\u0dbd . % \\u0da7 \\u0d87\\u0daf\\u0dbd \\u0daf\\u0dd0\\u0db8\\u0dca\\u0db8\\u0dd9\\u0dad\\u0dca \\u0db8\\u0dda\\u0d9a\\u0dd9 \\u0d87\\u0db8\\u0db1\\u0dd9\\u0db1 \\u0dc3\\u0dd2\\u0db4\\u0dca\\u0db4\\u0dd2\\u0d9a\\u0da7\\u0dd4\",\n \"\\u0db8\\u0dbd\\u0dca\\u0dbd\\u0dd2\\u0d9c\\u0dd9 \\u0d8b\\u0db4\\u0db1\\u0dca\\u0daf\\u0dd2\\u0db1\\u0dda\\u0da7 \\u0dc4\\u0dd9\\u0da7 \\u0daf\\u0dd9\\u0db1\\u0dca\\u0db1 \\u0daf\\u0dd9\\u0dba\\u0d9a\\u0dca \\u0d9a\\u0dbd\\u0dca\\u0db4\\u0db1\\u0dcf \\u0d9a\\u0dbb \\u0d9a\\u0dbb \\u0d89\\u0daf\\u0dca\\u0daf\\u0dd2 \\u0dbd\\u0ddc\\u0d9a\\u0dca\\u0d9a\\u0dcf \\u0daf\\u0dd4\\u0db1\\u0dca\\u0db1 \\u0dc3\\u0db4\\u0dca\\u200d\\u0dbb\\u0dcf\\u0dba\\u0dd2\\u0dc3\\u0dca \\u0d91\\u0d9a \\u0db1\\u0db8\\u0dca \\u0db4\\u0da7\\u0dca\\u0da7 . . \\u0dbb\\u0dad\\u0dd2\\u0da4\\u0dca\\u0da4\\u0dcf \\u0db4\\u0dd9\\u0da7\\u0dca\\u0da7\\u0dd2\\u0dba\\u0d9a\\u0dd2\\u0db1\\u0dca \\u0dc2\\u0dda\\u0db4\\u0dca \\u0d8b\\u0db1\\u0dd1\\u0d9a\\u0dd2 \\u0dc4\\u0dd0\\u0db8\\u0dd2\\u0db1\\u0dda\\u0dc2\\u0db1\\u0dca ourNation HappyBirthday\\u0d92\\u0d85\\u0dc3\\u0dca\\u0dc3\\u0dda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"domain\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"source": [
"print(len(df))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zTRfsZ_tNLif",
"outputId": "d0012de3-5298-4be5-b280-dee66208a034"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"100\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"path = '/content/drive/MyDrive/sinhala-dataset'\n",
"testing_json = \"/testing.json\""
],
"metadata": {
"id": "weZ2_xujNW1b"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df.to_json(path + testing_json, orient='records')\n"
],
"metadata": {
"id": "HzAfca0LNUDx"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 400,
"referenced_widgets": [
"1edd00396f2d45a7b32079d43bc62634",
"a96994a464df43918566f6cc967e7148",
"92143f1854c44349a3d0f6b7838b6a5c",
"b90793e5e29c435cab6fd7b1e059c992",
"8ce96d68c1e443b28e1200f106fefb02",
"dcfa2646664e449c98a00e89b2b7984d",
"28bcfe01e8a64ba08ce62e9715ad85e4",
"1b72e7f7e85a49fb8c7a79bce1989647",
"76523fc98b644aaaaf6c605544e9fffb",
"b2d2a9eabbe14cddaa7d0aa39e7a1953",
"bf4db198f72441b48a5dbff8515a1f91",
"48e8e488c27a4948a455835f6caf2ce2",
"2fb5a8ac30ed49df93056bc6802e8ee0",
"c29fcea40de347bf9f274f375b9123a5",
"00cd02215e1f4225a4cf93b46b9a7e15",
"e1064a6f8bfd4435a6ad15d08ff44699",
"956884e6e8bc43f4bc51a2b75c131889",
"4e23a4eec35f4f8ea3114d9cb0ea1e04",
"e7a3964adec34bf6b37f52cf1119fa9c",
"22a128f583aa4514a1e71d0f8aaf8e79",
"3f902b8cb652446c84609cd730a64e35",
"a78b7664a1e346f181b203bb1645eb9b",
"3bf7edeed06a4ef3b1ce28f24201c84b",
"205cbd07f34345c48b1e72bc2cb9a93b",
"a2d328e2313a49aab752cd2ba38220b5",
"a3696eb89c4e434683bb5416d91602db",
"e04dfc9c2e5f437c8dd9b15f33c04a4a",
"1f5b368654494327bfc8d1c315f13832",
"853eb13a56ec4fbf89e25f333798132e",
"5713250ff3864029a3668c6a7eb1f3e3",
"944ed217567144459ea5279c34f529f3",
"1e0faaa1a09f42f8964a3203472f50c2",
"7c3da939876e4a6f8f2969fbf96bbcd0",
"aacafd29b7b5403bb8a7df1ebe2a731e",
"ec944b4365c34ab6813af9d925e2a552",
"8d48d360da5945bfbf300ae455043c07",
"36f7f5ae075f4c59a44283e25088eaab",
"8cfa54ddcf354e0e9f71102656a744cf",
"edcb22216cac4bcf83a301d975f20d2d",
"3ace28614fe446f18268578e56b5ec14",
"3c8f3c862f744ddba9524079c636124a",
"36eb36dba13d4ad4a73b401e0dc22c42",
"236bb6124df1443684b16dd34fba2ed4",
"3264137a43cc4725948aab030421b24c",
"dd76625672d74095a0f691206646fbd8",
"a10c0e99afb546d79fca304e8a8e6ab1",
"6fa7911781ef42949e56c80dc1f85299",
"832e08b2b4524680bca9d71c363f3232",
"10ec65b4df86458ba2eee2eeccdc91e6",
"764921df2d7b438b8a8ad0d7b68b8b6e",
"12f7b959b26b472099ea4e06b606772a",
"32c6cb10b1e946a89a7b08505c1582e9",
"3ee11622c9a0405abf5f246720d358b2",
"d78ced95d2134e299573c1e8d712b3f8",
"6ff7b4b691a74f6ab232c26ee55b9982",
"2cc542103450405a853945ff07471932",
"5807fb12507f4fc0aaa4d083477aba27",
"11d2ce4ace194bb1825fefacf3cc36f0",
"b22724e628384993aa52c255c8bffc6f",
"e4dc10b2ef9c4a1f99689ebe9d48886a",
"6b580a221e2f48b5b058c6a6189cc99d",
"e5f30b22e01c4b74a2f20f3b9880d7ae",
"e353ffe187d94729a65453fcd3d8a9a3",
"b056d5ef8b9244828e10f685e38d47bb",
"32ce45ecf0d64c81bea12692ad52ed45",
"b930858da1a2407ba49a8a8a17a1fc70",
"d366271fc98943aa8fe3da314c1e95dc",
"364ab880ac304099b1bb83a92d6a7eed",
"4a8d5b1ca09d493c8b93baf92e7fd5ae",
"c6867a46aa064e26831be8a3a4278905",
"053fc43d2aad4aeeb114b8fd9aa2aef6",
"a9583dca84a14e9bb89e58a430c391e3",
"15b6e4ff685f4f5e8c997e151d2b4007",
"d73e215040114141bc60b0f58a1f8646",
"18ab1cf13c494b02af122ee0069e3c92",
"839cabb3e69c46549665769afcc24e08",
"56fe41cf83454378a9345ee4e9a26192"
]
},
"id": "ROUE4LV1d_tp",
"outputId": "7f3d865e-97c6-434e-a8ae-f69a3462586a"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:80: UserWarning: \n",
"Access to the secret `HF_TOKEN` has not been granted on this notebook.\n",
"You will not be requested again.\n",
"Please restart the session if you want to be prompted again.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/1.38k [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "1edd00396f2d45a7b32079d43bc62634"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"vocab.json: 0%| | 0.00/1.50M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "48e8e488c27a4948a455835f6caf2ce2"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"merges.txt: 0%| | 0.00/1.15M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "3bf7edeed06a4ef3b1ce28f24201c84b"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer.json: 0%| | 0.00/3.53M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "aacafd29b7b5403bb8a7df1ebe2a731e"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/957 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "dd76625672d74095a0f691206646fbd8"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"config.json: 0%| | 0.00/696 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "2cc542103450405a853945ff07471932"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"pytorch_model.bin: 0%| | 0.00/506M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d366271fc98943aa8fe3da314c1e95dc"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
" return self.fget.__get__(instance, owner)()\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 14
}
],
"source": [
"MODEL_SAVE_PATH = \"/content/drive/MyDrive/models-path-improvement/last-epoch-model-2024-03-08-15_34_03_6.pth\"\n",
"\n",
"max_len, bert = 160 , 'sinhala-nlp/sinbert-sold-si'\n",
"tokenizer = TokenizerFromPreTrained(max_len, bert)\n",
"\n",
"# dataset\n",
"batch_size = 100\n",
"\n",
"\n",
"testing_path = path + testing_json\n",
"\n",
"testing_set = TextDataset(testing_path, ['text'], tokenizer)\n",
"testing_loader = DataLoader(testing_set, batch_size, shuffle=False)\n",
"\n",
"# prepare model\n",
"domain_num = 3\n",
"\n",
"model = MDFEND(bert, domain_num , expert_num=18 , mlp_dims = [5080 ,4020, 3010, 2024 ,1012 ,606 , 400])\n",
"model.load_state_dict(torch.load(f=MODEL_SAVE_PATH, map_location=torch.device('cpu')))\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"source": [
"outputs = []\n",
"for batch_data in testing_loader:\n",
" outputs.append(model.predict(batch_data))"
],
"metadata": {
"id": "nsTmmtm7ENK7"
},
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"source": [
"outputs"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MgJFRW6uOTNK",
"outputId": "37176cb8-b2e6-4c3b-c852-c4e3a17cda30"
},
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[tensor([1.3248e-03, 2.0616e-01, 4.5341e-02, 9.3156e-01, 2.3167e-01, 9.9967e-01,\n",
" 6.8980e-02, 8.8265e-01, 4.6962e-01, 1.4711e-01, 3.9079e-01, 1.5254e-02,\n",
" 1.4336e-01, 9.9974e-01, 9.4320e-02, 9.6368e-01, 3.0400e-01, 1.1099e-02,\n",
" 8.6662e-01, 9.0376e-02, 4.0686e-01, 9.9839e-01, 9.9700e-01, 4.9826e-02,\n",
" 9.6036e-01, 3.1445e-02, 7.8756e-01, 5.2800e-01, 9.4090e-01, 9.9148e-01,\n",
" 9.9725e-01, 1.6041e-02, 2.9223e-01, 1.5572e-01, 7.2350e-02, 8.2344e-02,\n",
" 5.4701e-03, 7.9817e-01, 1.6082e-03, 2.3789e-01, 2.0766e-02, 9.8514e-01,\n",
" 1.4062e-02, 9.8410e-01, 5.0685e-01, 1.0039e-01, 3.5957e-01, 4.6990e-01,\n",
" 6.0348e-01, 5.4888e-01, 9.7326e-02, 1.4647e-03, 2.0198e-02, 9.9995e-01,\n",
" 8.6098e-01, 7.3051e-01, 3.0538e-03, 9.9967e-01, 5.4075e-03, 2.4586e-02,\n",
" 2.1326e-01, 9.9988e-01, 7.7565e-01, 7.3468e-01, 8.2214e-02, 8.3052e-03,\n",
" 3.7278e-01, 3.6124e-01, 2.4839e-01, 2.4560e-01, 3.9281e-02, 9.9611e-01,\n",
" 2.3351e-02, 1.9584e-01, 1.1381e-01, 2.4559e-01, 6.5344e-01, 3.5736e-01,\n",
" 8.6219e-04, 3.8071e-01, 5.6490e-01, 2.5499e-02, 6.1897e-02, 9.0802e-01,\n",
" 8.2842e-02, 8.5183e-04, 6.4453e-01, 6.0612e-01, 3.8544e-01, 4.0832e-02,\n",
" 6.0973e-01, 4.7808e-02, 7.0927e-01, 8.4603e-01, 5.6889e-01, 5.3337e-01,\n",
" 1.2113e-01, 8.6022e-01, 8.5642e-03, 9.9990e-01],\n",
" grad_fn=)]"
]
},
"metadata": {},
"execution_count": 16
}
]
},
{
"cell_type": "code",
"source": [
"# 1 ====> offensive\n",
"# 0 ====> not offensive\n",
"label = []\n",
"for output in outputs:\n",
" for out in output:\n",
" output_prob = out.item()\n",
" if output_prob >= 0.5:\n",
" label.append(1)\n",
" else:\n",
" label.append(0)\n"
],
"metadata": {
"id": "ySdut6vMOvQY"
},
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"source": [
"label"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8thgk3ykPTOD",
"outputId": "fb05dd91-01d4-44a8-ef66-caf0eda24831"
},
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 1,\n",
" 0,\n",
" 1,\n",
" 0,\n",
" 1]"
]
},
"metadata": {},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "P4eIsw1DPYQG"
},
"execution_count": null,
"outputs": []
}
]
}