{ "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" }, "colab": { "name": "evaluation.ipynb", "provenance": [], "collapsed_sections": [] }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "b3ac84dcf48f4ba8a65aecd1df5a1b68": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_490a5358971a45fa92989776dc6757c3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f5be7a9c6f1941659c806255a9315b7c", "IPY_MODEL_9eb5033009ee441eb164d862b4b2c39c" ] } }, "490a5358971a45fa92989776dc6757c3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f5be7a9c6f1941659c806255a9315b7c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_ed07265cff0c4c82b95e0d0c47359edb", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 618, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 618, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_46a87c4067114a31ae6e34c3a9464f76" } }, "9eb5033009ee441eb164d862b4b2c39c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_04a42f0ccb10413494e891907fc547d9", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 618/618 [00:01<00:00, 464B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2188756da7e74badb48a459ea15c02d5" } }, "ed07265cff0c4c82b95e0d0c47359edb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "46a87c4067114a31ae6e34c3a9464f76": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "04a42f0ccb10413494e891907fc547d9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "2188756da7e74badb48a459ea15c02d5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a1db2167447249ed95dfb78a97c24bf9": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_68ce97d8904e4b398af538a7ad1ed1ea", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_245bfeb4fb5542f9900cebd0e3cccc74", "IPY_MODEL_501c0ab625ed4f0bb9954de0b97e90f1" ] } }, "68ce97d8904e4b398af538a7ad1ed1ea": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "245bfeb4fb5542f9900cebd0e3cccc74": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f8140d5334e14d7988c17bbbae05b08e", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1388356, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1388356, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_0b6cce6d1a0e4de48ae7eb967dfeda87" } }, "501c0ab625ed4f0bb9954de0b97e90f1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_bac6501d76ce4b5ba56ab1effcedccbb", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1.39M/1.39M [00:00<00:00, 5.74MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_cf17e5af2e7e407e9d96e0325ad733de" } }, "f8140d5334e14d7988c17bbbae05b08e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "0b6cce6d1a0e4de48ae7eb967dfeda87": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bac6501d76ce4b5ba56ab1effcedccbb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "cf17e5af2e7e407e9d96e0325ad733de": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "d603ce6680d3425e8c145e77bc0e0e30": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_bbd848e83f30482dab926e53a7188f37", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_29c28419ac2848ca8ce6f73eea0e3425", "IPY_MODEL_ac01f9efc2ec4eb2af6dfd956467ab8e" ] } }, "bbd848e83f30482dab926e53a7188f37": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "29c28419ac2848ca8ce6f73eea0e3425": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_82c474271a584a46b9af812bd9947ff7", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5dea54f5878c4ea1b91aea2d6c01dcc9" } }, "ac01f9efc2ec4eb2af6dfd956467ab8e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_1f5a386436f142999c9cd61a5567167f", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:01<00:00, 1.82ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4d13f40b545245d88a9ce9cfa738a59c" } }, "82c474271a584a46b9af812bd9947ff7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "5dea54f5878c4ea1b91aea2d6c01dcc9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1f5a386436f142999c9cd61a5567167f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4d13f40b545245d88a9ce9cfa738a59c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bacd59325efe4e7a8289e2e77eca3f97": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_b6493940baf04dd6a461abd3d123d20d", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_1b5c2a40effd4df8a0e29f7589e4ce69", "IPY_MODEL_516c9f479605404886582c4af2d4860d" ] } }, "b6493940baf04dd6a461abd3d123d20d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1b5c2a40effd4df8a0e29f7589e4ce69": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_7d5cce86ee7c4016a6cff03887038660", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_36aa47fe9e22473ea71a0b6a4d740b35" } }, "516c9f479605404886582c4af2d4860d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b98d5bb115ce4458919bc628a7c453cc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:01<00:00, 1.96ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_6437a7817c244f54b4ca06d78d6aeff7" } }, "7d5cce86ee7c4016a6cff03887038660": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "36aa47fe9e22473ea71a0b6a4d740b35": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b98d5bb115ce4458919bc628a7c453cc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "6437a7817c244f54b4ca06d78d6aeff7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "3ac65aaeae574af5b7eea30b4a873ec2": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_8d99822c6a514136aa164846d039bbfc", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_105f722c571f47b2b7a7184c9fe45c18", "IPY_MODEL_4a6ab6cdeb0943ba8cf8486caadf2f8d" ] } }, "8d99822c6a514136aa164846d039bbfc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "105f722c571f47b2b7a7184c9fe45c18": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_4caf7f4e912d454b8ecf3f3971ede95e", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_36873c0c51904573809bb3eadd172383" } }, "4a6ab6cdeb0943ba8cf8486caadf2f8d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_15d31d2a49804328b4c80fa98dae8ff1", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.03ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7fa7cbad599741edbf0c099bf2668494" } }, "4caf7f4e912d454b8ecf3f3971ede95e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "36873c0c51904573809bb3eadd172383": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "15d31d2a49804328b4c80fa98dae8ff1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "7fa7cbad599741edbf0c099bf2668494": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f19e664d094f46f0a94d7410685d67eb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_74626cdc554848fab75607aed0324aa3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f2bcfb2929594116b9367a18a1778aa2", "IPY_MODEL_709d06d2c8614f9b97bf86ce8ed1118f" ] } }, "74626cdc554848fab75607aed0324aa3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f2bcfb2929594116b9367a18a1778aa2": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_65ca9ae8108d46a1999573c3267f16bc", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_520e4029bca14128a6eb4e2bbd4c78ed" } }, "709d06d2c8614f9b97bf86ce8ed1118f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_b8e8db36327e4706b6ab435a698cb3fd", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.03ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7d948bdb11c0427b8c49c48e7c5d9772" } }, "65ca9ae8108d46a1999573c3267f16bc": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "520e4029bca14128a6eb4e2bbd4c78ed": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b8e8db36327e4706b6ab435a698cb3fd": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "7d948bdb11c0427b8c49c48e7c5d9772": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1de82500b0d34e5c9f6c5f995f27ea03": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_c7e9741c804e421898f9c45cde1ce7cd", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_5178228787094bfba3b671af67e3df0f", "IPY_MODEL_3aa19fa9dda74118844e11a876039a0b" ] } }, "c7e9741c804e421898f9c45cde1ce7cd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "5178228787094bfba3b671af67e3df0f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_2afa7d80cc004014b4a672bc0b683fce", "_dom_classes": [], "description": "Downloading: ", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2482, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2482, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b2757df8c2e14c7d90befd479221e5c5" } }, "3aa19fa9dda74118844e11a876039a0b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_9ada8d8e30114064be38f3b5d4645f36", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 6.34k/? [00:27<00:00, 228B/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_a391191463d0455c8fd4f83b8ae69c8f" } }, "2afa7d80cc004014b4a672bc0b683fce": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "b2757df8c2e14c7d90befd479221e5c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9ada8d8e30114064be38f3b5d4645f36": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "a391191463d0455c8fd4f83b8ae69c8f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "47aef8e786ea416e8fa99869a46d008f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_eb587b328a7e4a88bffad029b4943a1e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_bf71a524605b4a63b33468e43b212b62", "IPY_MODEL_e9a69e05e82e48feb3f203e8ac7b7afa" ] } }, "eb587b328a7e4a88bffad029b4943a1e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bf71a524605b4a63b33468e43b212b62": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f3fa0e5bd0b34ffe92627a1615447d79", "_dom_classes": [], "description": "Downloading: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 498796983, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 498796983, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_03b37083ef7642f2942dd97b6e090c33" } }, "e9a69e05e82e48feb3f203e8ac7b7afa": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_18d6f6e2b90747258a2082796f2eeda9", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 499M/499M [05:39<00:00, 1.47MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e33495db5c3343a999c9c6d807abc238" } }, "f3fa0e5bd0b34ffe92627a1615447d79": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "03b37083ef7642f2942dd97b6e090c33": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "18d6f6e2b90747258a2082796f2eeda9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "e33495db5c3343a999c9c6d807abc238": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ceb189e178b24e03b398b7ba37e63a02": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_7b85ed3a5f034cbcbb3d87d9e5ac807d", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_81f7f3a6c8dc4c9e8320fbcc8b4aa270", "IPY_MODEL_f1187ab3d4a1491dacbf7f109810bf3c" ] } }, "7b85ed3a5f034cbcbb3d87d9e5ac807d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "81f7f3a6c8dc4c9e8320fbcc8b4aa270": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_43d1cd650b26471cbf07beb1733b943a", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_71410f8bf2d348ea9f3f173ea70c7829" } }, "f1187ab3d4a1491dacbf7f109810bf3c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_f8ae53cbfe38498095d7f7e2ee6b1b89", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:01<00:00, 1.99ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_dd961e0fdc604f0883ac6a674617b7ac" } }, "43d1cd650b26471cbf07beb1733b943a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "71410f8bf2d348ea9f3f173ea70c7829": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f8ae53cbfe38498095d7f7e2ee6b1b89": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "dd961e0fdc604f0883ac6a674617b7ac": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e9a04e6cb6094f00b75a248aebc11dcf": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_3a118dd7adf3459db52afc86e39e4681", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_59dae515b4804e3f8a64372ec0cd5254", "IPY_MODEL_a835eb4e43174c178a43fc20cf878f67" ] } }, "3a118dd7adf3459db52afc86e39e4681": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "59dae515b4804e3f8a64372ec0cd5254": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_bbd988fc15734daf834d7c1be0898f54", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_59a87b9b20fd43e2ad4df96508e2a4e8" } }, "a835eb4e43174c178a43fc20cf878f67": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_0c06e7199e2440ff8df24c7f4d3816bf", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.01ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1721231f136a41a89226a6993e770708" } }, "bbd988fc15734daf834d7c1be0898f54": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "59a87b9b20fd43e2ad4df96508e2a4e8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0c06e7199e2440ff8df24c7f4d3816bf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "1721231f136a41a89226a6993e770708": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ea1a49df91de49b39d7c28620d396ed3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_f43ced84b4ae4d0290406ff29ac319b6", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_249eab1d100f4e66891404bd0293cfac", "IPY_MODEL_de5f4a79619b4361adb69ebf75f480d3" ] } }, "f43ced84b4ae4d0290406ff29ac319b6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "249eab1d100f4e66891404bd0293cfac": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_38baac16961b491d99a195ec08b7146c", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_31550c6938b245bdaff271f77e36a265" } }, "de5f4a79619b4361adb69ebf75f480d3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_86d213b1b67649e28127ca8919d1b508", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.09ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_3833a9687f8440c8bc7159be7c5da6aa" } }, "38baac16961b491d99a195ec08b7146c": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "31550c6938b245bdaff271f77e36a265": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "86d213b1b67649e28127ca8919d1b508": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "3833a9687f8440c8bc7159be7c5da6aa": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0eb974fa255b4b378c19580c9b443242": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_72b317f6253640efa451b41609f418cc", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_9dbc6bdd1fcd4258b27b0b36f537a52b", "IPY_MODEL_48eab5fd5f5e4f4f9b1517a3f2e53052" ] } }, "72b317f6253640efa451b41609f418cc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9dbc6bdd1fcd4258b27b0b36f537a52b": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_562f962ecfe1438ba3bd67406c3bc415", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e5c84b81d24a4120ad364cf7a8d92b13" } }, "48eab5fd5f5e4f4f9b1517a3f2e53052": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_12ba4202432a4d9da9b4295bee72ffac", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 2/2 [00:00<00:00, 2.55ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4b846d71afd943278c1ac0cecfa13f65" } }, "562f962ecfe1438ba3bd67406c3bc415": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "e5c84b81d24a4120ad364cf7a8d92b13": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "12ba4202432a4d9da9b4295bee72ffac": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4b846d71afd943278c1ac0cecfa13f65": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "c23a9b479ca54b5082ced5e0359f54d9": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_ce19ae483e1c4a9b90b3fecfea1330e4", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_e953e23fd1364232b6cd39ec11ccaecb", "IPY_MODEL_75b304f6ca5945b295a64fafcf283c4e" ] } }, "ce19ae483e1c4a9b90b3fecfea1330e4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e953e23fd1364232b6cd39ec11ccaecb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_cb932f442d0c4c8b959e455cc0ce1329", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5fa39708c9684ce3b93577f1a0197bf4" } }, "75b304f6ca5945b295a64fafcf283c4e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_4e7a38f355d84811a66936be929d270d", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 4.83ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_50499dbed5cb470a9b5dd7f4e5144405" } }, "cb932f442d0c4c8b959e455cc0ce1329": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "5fa39708c9684ce3b93577f1a0197bf4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "4e7a38f355d84811a66936be929d270d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "50499dbed5cb470a9b5dd7f4e5144405": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1d3024c3e0374116bb856902590f6bfd": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_1a406ffa02dc479aa0fcdfa41afcbb0b", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_9f6792ca58ed4906bb671ae049e2b860", "IPY_MODEL_54ef5c70c3f146e1a1fbd82f8aeedbb9" ] } }, "1a406ffa02dc479aa0fcdfa41afcbb0b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9f6792ca58ed4906bb671ae049e2b860": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_ed8c1a4ff520407eb35402fb4e5fcc71", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_68819af7768e486da1752ff27d2e4104" } }, "54ef5c70c3f146e1a1fbd82f8aeedbb9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_4039c305be024488abecf4a635345c46", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 2.59ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_80622e53e4124d0cbd506542f81f4818" } }, "ed8c1a4ff520407eb35402fb4e5fcc71": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "68819af7768e486da1752ff27d2e4104": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "4039c305be024488abecf4a635345c46": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "80622e53e4124d0cbd506542f81f4818": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "2cad6e60bb414f46b79720dfc4f87f9e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_fc6b7d2794bb44cdabafdfba208b8dc3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_f8fd5a767ff3404ebbadc6cc63110445", "IPY_MODEL_8b638f038ef04ea7b2ad3bdcb90f0141" ] } }, "fc6b7d2794bb44cdabafdfba208b8dc3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f8fd5a767ff3404ebbadc6cc63110445": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_305833c39a074d0296ef4511b6f61e7e", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f31cad3887d640d8b4322598eca1ca70" } }, "8b638f038ef04ea7b2ad3bdcb90f0141": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_02ecdd0d12614752a19b294395cb821c", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 3.82ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_4c62a1fd18a54327a1f50bf3ca087027" } }, "305833c39a074d0296ef4511b6f61e7e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "f31cad3887d640d8b4322598eca1ca70": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "02ecdd0d12614752a19b294395cb821c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "4c62a1fd18a54327a1f50bf3ca087027": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9daad6aa599343278ca5198897b390b3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_b803d1d51df9466db862f96fb8e49257", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_266b34a7ee284ac590ffa999f1351330", "IPY_MODEL_93a03570b7bf4f3f8494f7b5677a7c70" ] } }, "b803d1d51df9466db862f96fb8e49257": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "266b34a7ee284ac590ffa999f1351330": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_543ff4b83d804f52afbcb7a57ca22570", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_468d17728336411db088c69330f4de68" } }, "93a03570b7bf4f3f8494f7b5677a7c70": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_fe17854a2dbe4949aeb4efb309cab6fc", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 3.93ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_a1feb0e41e2f47e28f550ed81b61e952" } }, "543ff4b83d804f52afbcb7a57ca22570": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "468d17728336411db088c69330f4de68": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "fe17854a2dbe4949aeb4efb309cab6fc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "a1feb0e41e2f47e28f550ed81b61e952": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ca9eb397fcd640b48838e58579b475bc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_4234b3e6815040af90d298f6ab2a808e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_30739849e0db45daa664d8992ab6f458", "IPY_MODEL_24dc9ed72f8048fe8b174f2f1ef6e2ca" ] } }, "4234b3e6815040af90d298f6ab2a808e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "30739849e0db45daa664d8992ab6f458": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_641876e099e84c44854262d5e709409e", "_dom_classes": [], "description": "Running tokenizer on dataset #0: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f8ab57bd0aba4f608a8f7549baa4ead4" } }, "24dc9ed72f8048fe8b174f2f1ef6e2ca": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_75b2a86adb434e8cb3358a3461be99e6", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 2.70ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e005269e95704fae8ff9ebe8f932d023" } }, "641876e099e84c44854262d5e709409e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "f8ab57bd0aba4f608a8f7549baa4ead4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "75b2a86adb434e8cb3358a3461be99e6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "e005269e95704fae8ff9ebe8f932d023": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0217f8a6209948cb9334eeaf5c489b1b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_8589613c58db44bc959fc43c003dc689", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_168ee56a9f65422ca6c503ea29e8a825", "IPY_MODEL_b5f3ea592806493abe87f29fbe8059fb" ] } }, "8589613c58db44bc959fc43c003dc689": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "168ee56a9f65422ca6c503ea29e8a825": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_a53357f0785046afbbfcc408a5aa0f19", "_dom_classes": [], "description": "Running tokenizer on dataset #1: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_3c4b3cd5af584ad584da81f19a26eec0" } }, "b5f3ea592806493abe87f29fbe8059fb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_9097e5ba3c00454c823871d38393907b", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 3.40ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_5d0f563e826c4ef79ff1ed6cc7ab1c01" } }, "a53357f0785046afbbfcc408a5aa0f19": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "3c4b3cd5af584ad584da81f19a26eec0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9097e5ba3c00454c823871d38393907b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "5d0f563e826c4ef79ff1ed6cc7ab1c01": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9de41b790b2c4df88309b8e6e66972cc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_26274dbb3a324f3ca7a2a91ef24adb83", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_783f5d6b893d4f2e968c249605773c9e", "IPY_MODEL_1a597fd54ab04a4cb80147e93abc72b8" ] } }, "26274dbb3a324f3ca7a2a91ef24adb83": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "783f5d6b893d4f2e968c249605773c9e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f317aee64c0f43a08c8157413d7956e1", "_dom_classes": [], "description": "Running tokenizer on dataset #2: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1f37c1324fba4488a0d19cc1d623f887" } }, "1a597fd54ab04a4cb80147e93abc72b8": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_584ec18135294eb9a916c85385eb4bea", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 4.69ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_8e67b29112a04c48a978cf648904ee61" } }, "f317aee64c0f43a08c8157413d7956e1": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "1f37c1324fba4488a0d19cc1d623f887": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "584ec18135294eb9a916c85385eb4bea": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "8e67b29112a04c48a978cf648904ee61": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ece5245bfb5c4b02aa1e2527974f1cc6": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_ab4e0611c3d7428b9ffa703f794852a7", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_0df9c4b4ed0240f2871906f72e9e9839", "IPY_MODEL_5731d37f432b4713b9b8be897b1f6686" ] } }, "ab4e0611c3d7428b9ffa703f794852a7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0df9c4b4ed0240f2871906f72e9e9839": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_fa9d6f6c384241f192660b2668956ad4", "_dom_classes": [], "description": "Running tokenizer on dataset #3: 100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_c04cd9397ecf4de69d5e7fc39311629c" } }, "5731d37f432b4713b9b8be897b1f6686": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_669706d500bc404ca65e16f939db02c5", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 5.11ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_fc71353cc3b5434ba3c32d5c64069757" } }, "fa9d6f6c384241f192660b2668956ad4": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "c04cd9397ecf4de69d5e7fc39311629c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "669706d500bc404ca65e16f939db02c5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "fc71353cc3b5434ba3c32d5c64069757": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QnKI9cdUFw23", "outputId": "82086023-d380-4e1a-f11a-b0047bb10190" }, "source": [ "!pip install danlp transformers datasets numpy flax seqeval" ], "id": "QnKI9cdUFw23", "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Collecting danlp\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/62/6b/3a245c069f0a5376e565d67c2f9fb04a39e4d7e94c93c2d27e57c7bf9012/danlp-0.0.12-py3-none-any.whl (71kB)\n", "\r\u001b[K |████▋ | 10kB 16.0MB/s eta 0:00:01\r\u001b[K |█████████▏ | 20kB 22.1MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 30kB 25.0MB/s eta 0:00:01\r\u001b[K |██████████████████▍ | 40kB 27.0MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 51kB 29.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 61kB 29.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 71kB 9.4MB/s \n", "\u001b[?25hCollecting transformers\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/fd/1a/41c644c963249fd7f3836d926afa1e3f1cc234a1c40d80c5f03ad8f6f1b2/transformers-4.8.2-py3-none-any.whl (2.5MB)\n", "\u001b[K |████████████████████████████████| 2.5MB 33.3MB/s \n", "\u001b[?25hCollecting datasets\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/86/27/9c91ddee87b06d2de12f134c5171a49890427e398389f07f6463485723c3/datasets-1.9.0-py3-none-any.whl (262kB)\n", "\u001b[K |████████████████████████████████| 266kB 42.9MB/s \n", "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (1.19.5)\n", "Collecting flax\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f6/21/21ca1f4831ac24646578d2545c4db9a8369b9da4a4b7dcf067feee312b45/flax-0.3.4-py3-none-any.whl (183kB)\n", "\u001b[K |████████████████████████████████| 184kB 49.2MB/s \n", "\u001b[?25hCollecting seqeval\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)\n", "\u001b[K |████████████████████████████████| 51kB 4.7MB/s \n", "\u001b[?25hRequirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (from danlp) (3.10.0)\n", "Collecting conllu\n", " Downloading https://files.pythonhosted.org/packages/ae/be/be6959c3ff2dbfdd87de4be0ccdff577835b5d08b1d25bf7fd4aaf0d7add/conllu-4.4-py2.py3-none-any.whl\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from danlp) (1.1.5)\n", "Collecting pyconll\n", " Downloading https://files.pythonhosted.org/packages/0a/4c/edf12b4b211f8a0f7f85a52ed4b50cd453ac96e9b751427e0296eb7ae42a/pyconll-3.1.0-py3-none-any.whl\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from danlp) (4.41.1)\n", "Collecting sacremoses\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)\n", "\u001b[K |████████████████████████████████| 901kB 46.1MB/s \n", "\u001b[?25hCollecting huggingface-hub==0.0.12\n", " Downloading https://files.pythonhosted.org/packages/2f/ee/97e253668fda9b17e968b3f97b2f8e53aa0127e8807d24a547687423fe0b/huggingface_hub-0.0.12-py3-none-any.whl\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from transformers) (3.13)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (4.6.0)\n", "Collecting tokenizers<0.11,>=0.10.1\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)\n", "\u001b[K |████████████████████████████████| 3.3MB 36.7MB/s \n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n", "Collecting fsspec>=2021.05.0\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0e/3a/666e63625a19883ae8e1674099e631f9737bd5478c4790e5ad49c5ac5261/fsspec-2021.6.1-py3-none-any.whl (115kB)\n", "\u001b[K |████████████████████████████████| 122kB 52.7MB/s \n", "\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n", "Collecting xxhash\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/4f/0a862cad26aa2ed7a7cd87178cbbfa824fc1383e472d63596a0d018374e7/xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243kB)\n", "\u001b[K |████████████████████████████████| 245kB 49.9MB/s \n", "\u001b[?25hRequirement already satisfied: msgpack in /usr/local/lib/python3.7/dist-packages (from flax) (1.0.2)\n", "Requirement already satisfied: jax>=0.2.13 in /usr/local/lib/python3.7/dist-packages (from flax) (0.2.13)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from flax) (3.2.2)\n", "Collecting optax\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/07/48/4f65dbb5ec096917ec039ba2c7eccf97ee05a4157e0e965a45ed3b7a13f9/optax-0.0.9-py3-none-any.whl (118kB)\n", "\u001b[K |████████████████████████████████| 122kB 53.9MB/s \n", "\u001b[?25hRequirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (0.22.2.post1)\n", "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy->danlp) (1.15.0)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy->danlp) (1.3.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->danlp) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->danlp) (2018.9)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub==0.0.12->transformers) (3.7.4.3)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.5.30)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", "Requirement already satisfied: absl-py in /usr/local/lib/python3.7/dist-packages (from jax>=0.2.13->flax) (0.12.0)\n", "Requirement already satisfied: opt-einsum in /usr/local/lib/python3.7/dist-packages (from jax>=0.2.13->flax) (3.3.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->flax) (1.3.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->flax) (0.10.0)\n", "Requirement already satisfied: jaxlib>=0.1.37 in /usr/local/lib/python3.7/dist-packages (from optax->flax) (0.1.66+cuda110)\n", "Collecting chex>=0.0.4\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0f/95/ccd2da57155c019efb3a60e3e5ecb9da431e19ebb16cce1e6981d615d75e/chex-0.0.8-py3-none-any.whl (57kB)\n", "\u001b[K |████████████████████████████████| 61kB 9.6MB/s \n", "\u001b[?25hRequirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.1)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy->danlp) (3.1.1)\n", "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.7/dist-packages (from jaxlib>=0.1.37->optax->flax) (1.12)\n", "Requirement already satisfied: toolz>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from chex>=0.0.4->optax->flax) (0.11.1)\n", "Requirement already satisfied: dm-tree>=0.1.5 in /usr/local/lib/python3.7/dist-packages (from chex>=0.0.4->optax->flax) (0.1.6)\n", "Building wheels for collected packages: seqeval\n", " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for seqeval: filename=seqeval-1.2.2-cp37-none-any.whl size=16184 sha256=81ccf3b27c5f3a0c3ce7d2c6f1f8de09ed86269e87a025f4fb1e192824c0964a\n", " Stored in directory: /root/.cache/pip/wheels/52/df/1b/45d75646c37428f7e626214704a0e35bd3cfc32eda37e59e5f\n", "Successfully built seqeval\n", "Installing collected packages: conllu, pyconll, danlp, sacremoses, huggingface-hub, tokenizers, transformers, fsspec, xxhash, datasets, chex, optax, flax, seqeval\n", "Successfully installed chex-0.0.8 conllu-4.4 danlp-0.0.12 datasets-1.9.0 flax-0.3.4 fsspec-2021.6.1 huggingface-hub-0.0.12 optax-0.0.9 pyconll-3.1.0 sacremoses-0.0.45 seqeval-1.2.2 tokenizers-0.10.3 transformers-4.8.2 xxhash-2.0.2\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "e72bf8e7-5819-4e14-a0e1-384234089c84" }, "source": [ "from danlp.datasets import DDT\n", "from transformers import (AutoConfig, AutoTokenizer, AutoModelForTokenClassification, \n", " DataCollatorForTokenClassification, TrainingArguments, Trainer)\n", "from datasets import Dataset, load_metric\n", "from functools import partial\n", "import numpy as np" ], "id": "e72bf8e7-5819-4e14-a0e1-384234089c84", "execution_count": 2, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "7bc1ab27-c2bb-42fc-94d7-a94e4d1dc4e4" }, "source": [ "# Evaluation of Language Models for Danish" ], "id": "7bc1ab27-c2bb-42fc-94d7-a94e4d1dc4e4" }, { "cell_type": "markdown", "metadata": { "id": "79792719-51b5-4c4f-a4ab-7124719b9853" }, "source": [ "This notebook is an investigation into how much, if anything, is gained from including more languages into the training set of a language model at pretraining. We will finetune and evaluate three models:\n", "\n", "1. `flax-community/roberta-base-danish` is a Danish RoBERTa-base model trained on the Danish part of the [mC4](https://github.com/allenai/allennlp/discussions/5265) dataset;\n", "2. `flax-community/roberta-large-scandi` is a Scandinavian RoBERTa-base model, trained on the Danish, Norwegian and Swedish part of the [mC4](https://github.com/allenai/allennlp/discussions/5265) dataset;\n", "3. `xlm-roberta-base` is a multilingual RoBERTa-base model trained on over 100 languages, on a filtered subset of the Common Crawl dataset." ], "id": "79792719-51b5-4c4f-a4ab-7124719b9853" }, { "cell_type": "markdown", "metadata": { "id": "f026a443-e2bf-4f51-b934-629b277c3530" }, "source": [ "## Named Entity Recognition" ], "id": "f026a443-e2bf-4f51-b934-629b277c3530" }, { "cell_type": "markdown", "metadata": { "id": "7aee4cb1-28d3-40da-a939-00e55ad5ce2c" }, "source": [ "### Preparing the datasets" ], "id": "7aee4cb1-28d3-40da-a939-00e55ad5ce2c" }, { "cell_type": "markdown", "metadata": { "id": "5c463704-5c8b-4e88-9fca-db7000b70aed" }, "source": [ "We start by loading the DaNE dataset for the NER task." ], "id": "5c463704-5c8b-4e88-9fca-db7000b70aed" }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8daf7629-311d-4fba-916b-b9c7f6debfa4", "outputId": "6f9bfa7e-3d7b-4230-c249-8bb74671ffc9" }, "source": [ "# Load the DaNE data\n", "train, val, test = DDT().load_as_simple_ner(predefined_splits=True)\n", "\n", "# Split docs and labels\n", "train_docs, train_labels = train\n", "val_docs, val_labels = val\n", "test_docs, test_labels = test\n", "\n", "print(f'Loaded {len(train_docs)} training samples, '\n", " f'{len(val_docs)} validation samples and '\n", " f'{len(test_docs)} test samples.')" ], "id": "8daf7629-311d-4fba-916b-b9c7f6debfa4", "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Downloading file /tmp/tmptw7g3c2s\n", "Loaded 4383 training samples, 564 validation samples and 565 test samples.\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "3bc2922d-3c32-4e96-ba08-07e0c56a387f" }, "source": [ "We next set up the labels in the dataset, converting them to a numeric representation." ], "id": "3bc2922d-3c32-4e96-ba08-07e0c56a387f" }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "e0082911-f46f-45d3-ac53-aa59296fabc0", "outputId": "e1d0b1a6-2a92-4d1b-a72f-4facd464b9f2" }, "source": [ "# Get the set of all unique labels in the dataset\n", "unique_labels = list({lbl for lbl_list in train_labels for lbl in lbl_list})\n", "\n", "# Set up a numeric representation of the labels\n", "label2id = {unique_labels[id]: id for id in range(len(unique_labels))}\n", "id2label = {id: unique_labels[id] for id in range(len(unique_labels))}\n", "\n", "print(f'There are {len(unique_labels)} unique labels in the dataset:')\n", "print(unique_labels)" ], "id": "e0082911-f46f-45d3-ac53-aa59296fabc0", "execution_count": 4, "outputs": [ { "output_type": "stream", "text": [ "There are 9 unique labels in the dataset:\n", "['B-PER', 'I-PER', 'O', 'I-LOC', 'B-ORG', 'B-MISC', 'I-MISC', 'B-LOC', 'I-ORG']\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "a4dd43e2-accf-403e-a39d-51d68dd9c5de" }, "source": [ "### Setting up the models" ], "id": "a4dd43e2-accf-403e-a39d-51d68dd9c5de" }, { "cell_type": "markdown", "metadata": { "id": "b9b9024f-f981-4a08-8dcf-ce73c4d77a35" }, "source": [ "Next, we load the tokenisers and the models that we want to compare." ], "id": "b9b9024f-f981-4a08-8dcf-ce73c4d77a35" }, { "cell_type": "code", "metadata": { "id": "713a522b-511c-4dd3-9948-66e3bf8cb40b" }, "source": [ "def prepare_model(name: str) -> dict: \n", " config = AutoConfig.from_pretrained(name, \n", " num_labels=len(unique_labels),\n", " label2id=label2id,\n", " id2label=id2label,\n", " finetuning_task='ner')\n", " \n", " tokenizer = AutoTokenizer.from_pretrained(name, \n", " use_fast=True,\n", " add_prefix_space=True)\n", " \n", " try:\n", " model = AutoModelForTokenClassification.from_pretrained(name,\n", " config=config)\n", " except OSError:\n", " model = AutoModelForTokenClassification.from_pretrained(name,\n", " config=config,\n", " from_flax=True)\n", " \n", " return dict(name=name, model=model, tokenizer=tokenizer)" ], "id": "713a522b-511c-4dd3-9948-66e3bf8cb40b", "execution_count": 5, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "d84b8b8a-4413-4778-b26e-ab6bf76c0798" }, "source": [ "### Setting up tokenisation of the datasets" ], "id": "d84b8b8a-4413-4778-b26e-ab6bf76c0798" }, { "cell_type": "markdown", "metadata": { "id": "143a3ba2-fdac-44fb-8130-0524866a01a0" }, "source": [ "We start by converting the datasets into the HuggingFace format." ], "id": "143a3ba2-fdac-44fb-8130-0524866a01a0" }, { "cell_type": "code", "metadata": { "id": "d8945416-1869-424f-b146-0c5848611305" }, "source": [ "train_dataset = Dataset.from_dict(dict(docs=train_docs, orig_labels=train_labels))\n", "val_dataset = Dataset.from_dict(dict(docs=val_docs, orig_labels=val_labels))\n", "test_dataset = Dataset.from_dict(dict(docs=test_docs, orig_labels=test_labels))" ], "id": "d8945416-1869-424f-b146-0c5848611305", "execution_count": 6, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "c8f7a029-f01e-4159-a317-47c9eba7fbfa" }, "source": [ "Next, we define a function which tokenises the dataset as well as aligning it with the labels in the dataset." ], "id": "c8f7a029-f01e-4159-a317-47c9eba7fbfa" }, { "cell_type": "code", "metadata": { "id": "fbf1ee90-5222-4840-9c61-43ace2e9abe3" }, "source": [ "def tokenize_and_align_labels(examples: dict, tokenizer) -> dict:\n", " '''Tokenize all texts and align the labels with them'''\n", " tokenized_inputs = tokenizer(\n", " examples['docs'],\n", " # We use this argument because the texts in our dataset are lists of words (with a label for each word).\n", " is_split_into_words=True,\n", " )\n", " labels = []\n", " for i, label in enumerate(examples['orig_labels']):\n", " word_ids = tokenized_inputs.word_ids(batch_index=i)\n", " previous_word_idx = None\n", " label_ids = []\n", " for word_idx in word_ids:\n", " # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n", " # ignored in the loss function.\n", " if word_idx is None:\n", " label_ids.append(-100)\n", " # We set the label for the first token of each word.\n", " elif word_idx != previous_word_idx:\n", " label_ids.append(label2id[label[word_idx]])\n", " # For the other tokens in a word, we set the label to either the current label or -100, depending on\n", " # the label_all_tokens flag.\n", " else:\n", " label_ids.append(-100)#label2id[label[word_idx]])\n", " previous_word_idx = word_idx\n", "\n", " labels.append(label_ids)\n", " tokenized_inputs[\"labels\"] = labels\n", " return tokenized_inputs\n", "\n", "def tokenize_dataset(dataset: Dataset, tokenizer) -> Dataset:\n", " return dataset.map(partial(tokenize_and_align_labels, tokenizer=tokenizer),\n", " batched=True,\n", " num_proc=4,\n", " desc=\"Running tokenizer on dataset\")" ], "id": "fbf1ee90-5222-4840-9c61-43ace2e9abe3", "execution_count": 7, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "e508cfec-a830-416d-901e-0a6b5ce67598" }, "source": [ "Just to see that it worked, let's have a look at a tokenized dataset." ], "id": "e508cfec-a830-416d-901e-0a6b5ce67598" }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 450, "referenced_widgets": [ "b3ac84dcf48f4ba8a65aecd1df5a1b68", "490a5358971a45fa92989776dc6757c3", "f5be7a9c6f1941659c806255a9315b7c", "9eb5033009ee441eb164d862b4b2c39c", "ed07265cff0c4c82b95e0d0c47359edb", "46a87c4067114a31ae6e34c3a9464f76", "04a42f0ccb10413494e891907fc547d9", "2188756da7e74badb48a459ea15c02d5", "a1db2167447249ed95dfb78a97c24bf9", "68ce97d8904e4b398af538a7ad1ed1ea", "245bfeb4fb5542f9900cebd0e3cccc74", "501c0ab625ed4f0bb9954de0b97e90f1", "f8140d5334e14d7988c17bbbae05b08e", "0b6cce6d1a0e4de48ae7eb967dfeda87", "bac6501d76ce4b5ba56ab1effcedccbb", "cf17e5af2e7e407e9d96e0325ad733de", "d603ce6680d3425e8c145e77bc0e0e30", "bbd848e83f30482dab926e53a7188f37", "29c28419ac2848ca8ce6f73eea0e3425", "ac01f9efc2ec4eb2af6dfd956467ab8e", "82c474271a584a46b9af812bd9947ff7", "5dea54f5878c4ea1b91aea2d6c01dcc9", "1f5a386436f142999c9cd61a5567167f", "4d13f40b545245d88a9ce9cfa738a59c", "bacd59325efe4e7a8289e2e77eca3f97", "b6493940baf04dd6a461abd3d123d20d", "1b5c2a40effd4df8a0e29f7589e4ce69", "516c9f479605404886582c4af2d4860d", "7d5cce86ee7c4016a6cff03887038660", "36aa47fe9e22473ea71a0b6a4d740b35", "b98d5bb115ce4458919bc628a7c453cc", "6437a7817c244f54b4ca06d78d6aeff7", "3ac65aaeae574af5b7eea30b4a873ec2", "8d99822c6a514136aa164846d039bbfc", "105f722c571f47b2b7a7184c9fe45c18", "4a6ab6cdeb0943ba8cf8486caadf2f8d", "4caf7f4e912d454b8ecf3f3971ede95e", "36873c0c51904573809bb3eadd172383", "15d31d2a49804328b4c80fa98dae8ff1", "7fa7cbad599741edbf0c099bf2668494", "f19e664d094f46f0a94d7410685d67eb", "74626cdc554848fab75607aed0324aa3", "f2bcfb2929594116b9367a18a1778aa2", "709d06d2c8614f9b97bf86ce8ed1118f", "65ca9ae8108d46a1999573c3267f16bc", "520e4029bca14128a6eb4e2bbd4c78ed", "b8e8db36327e4706b6ab435a698cb3fd", "7d948bdb11c0427b8c49c48e7c5d9772" ] }, "id": "4f6b4d78-f060-4b98-b022-a4182f0617c3", "outputId": "3c1e9613-7224-464b-e43f-90f568364b4b" }, "source": [ "tokenizer = AutoTokenizer.from_pretrained('flax-community/roberta-base-danish', \n", " use_fast=True,\n", " add_prefix_space=True)\n", "tokenized_train = tokenize_dataset(train_dataset, tokenizer)\n", "print(f'Sample document:')\n", "print(list(zip(tokenized_train[0][\"docs\"], tokenized_train[0][\"orig_labels\"])))\n", "print()\n", "print(f'Tokenized document:')\n", "print(list(zip([tokenizer.decode(tok).strip() for tok in tokenized_train[0][\"input_ids\"]], \n", " [id2label[id] for id in tokenized_train[0][\"labels\"] if id != -100])))" ], "id": "4f6b4d78-f060-4b98-b022-a4182f0617c3", "execution_count": 8, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3ac84dcf48f4ba8a65aecd1df5a1b68", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=618.0, style=ProgressStyle(description_…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a1db2167447249ed95dfb78a97c24bf9", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1388356.0, style=ProgressStyle(descript…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d603ce6680d3425e8c145e77bc0e0e30", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bacd59325efe4e7a8289e2e77eca3f97", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3ac65aaeae574af5b7eea30b4a873ec2", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f19e664d094f46f0a94d7410685d67eb", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", "Sample document:\n", "[('På', 'O'), ('fredag', 'O'), ('har', 'O'), ('SID', 'B-ORG'), ('inviteret', 'O'), ('til', 'O'), ('reception', 'O'), ('i', 'O'), ('SID-huset', 'B-LOC'), ('i', 'O'), ('anledning', 'O'), ('af', 'O'), ('at', 'O'), ('formanden', 'O'), ('Kjeld', 'B-PER'), ('Christensen', 'I-PER'), ('går', 'O'), ('ind', 'O'), ('i', 'O'), ('de', 'O'), ('glade', 'O'), ('tressere', 'O'), ('.', 'O')]\n", "\n", "Tokenized document:\n", "[('På', 'O'), ('fredag', 'O'), ('har', 'O'), ('SID', 'B-ORG'), ('inviteret', 'O'), ('til', 'O'), ('reception', 'O'), ('i', 'O'), ('SID', 'B-LOC'), ('-', 'O'), ('huset', 'O'), ('i', 'O'), ('anledning', 'O'), ('af', 'O'), ('at', 'B-PER'), ('formanden', 'I-PER'), ('Kjeld', 'O'), ('Christensen', 'O'), ('går', 'O'), ('ind', 'O'), ('i', 'O'), ('de', 'O'), ('glade', 'O')]\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "5be05dc0-94c2-41e2-ad36-464c846e034e" }, "source": [ "### Finetuning the models" ], "id": "5be05dc0-94c2-41e2-ad36-464c846e034e" }, { "cell_type": "markdown", "metadata": { "id": "515c11c5-51a8-4323-847d-7ca67178f1ef" }, "source": [ "We now set up the actual finetuning of the models. We will be employing the `Trainer` class from the `transformers` library, and the following `compute_metrics` helper function is used during training to compute the metrics that we are interested in." ], "id": "515c11c5-51a8-4323-847d-7ca67178f1ef" }, { "cell_type": "code", "metadata": { "id": "75aadb73-a073-48bb-b808-a3f228556db2", "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "1de82500b0d34e5c9f6c5f995f27ea03", "c7e9741c804e421898f9c45cde1ce7cd", "5178228787094bfba3b671af67e3df0f", "3aa19fa9dda74118844e11a876039a0b", "2afa7d80cc004014b4a672bc0b683fce", "b2757df8c2e14c7d90befd479221e5c5", "9ada8d8e30114064be38f3b5d4645f36", "a391191463d0455c8fd4f83b8ae69c8f" ] }, "outputId": "b470d1a4-d845-424a-e2c9-16d87b52a1f9" }, "source": [ "# Initialise metric\n", "metric = load_metric(\"seqeval\")\n", "\n", "def compute_metrics(p):\n", " '''Helper function for computing metrics'''\n", " predictions, labels = p\n", " predictions = np.argmax(predictions, axis=-1)\n", "\n", " # Remove ignored index (special tokens)\n", " true_predictions = [\n", " [id2label[p] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", " true_labels = [\n", " [id2label[l] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", "\n", " results = metric.compute(predictions=true_predictions, references=true_labels)\n", " return dict(precision=results[\"overall_precision\"],\n", " recall=results[\"overall_recall\"],\n", " f1=results[\"overall_f1\"],\n", " accuracy=results[\"overall_accuracy\"])" ], "id": "75aadb73-a073-48bb-b808-a3f228556db2", "execution_count": 9, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1de82500b0d34e5c9f6c5f995f27ea03", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2482.0, style=ProgressStyle(description…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "cdc47b49-6227-498a-835f-caa38e5e7796" }, "source": [ "The following script then tokenises the dataset using the specified tokeniser, and starts finetuning on the DaNE dataset." ], "id": "cdc47b49-6227-498a-835f-caa38e5e7796" }, { "cell_type": "code", "metadata": { "id": "e4a09abf-6230-44f4-87b0-fd630b7c502f" }, "source": [ "def finetune(model_name: str, \n", " epochs: int = 10, \n", " lr: float = 5e-5, \n", " batch_size: int = 32,\n", " save: bool = True):\n", " '''Finetune a transformer model for NER on the DaNE dataset'''\n", "\n", " # Fetch the model and tokenizer\n", " model_dict = prepare_model(model_name)\n", " \n", " # Tokenize the datasets\n", " tokenized_train = tokenize_dataset(train_dataset, model_dict['tokenizer'])\n", " tokenized_val = tokenize_dataset(val_dataset, model_dict['tokenizer'])\n", " tokenized_test = tokenize_dataset(test_dataset, model_dict['tokenizer'])\n", " \n", " # Initialise the data collator\n", " data_collator = DataCollatorForTokenClassification(model_dict['tokenizer'])\n", " \n", " # Initialise training arguments\n", " training_args = TrainingArguments(output_dir=f'../models/{model_dict[\"name\"]}-ner-dane',\n", " evaluation_strategy='epoch',\n", " logging_strategy='epoch',\n", " save_strategy='epoch' if save else 'no',\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size,\n", " gradient_accumulation_steps=1,\n", " learning_rate=lr,\n", " num_train_epochs=epochs,\n", " warmup_steps=50,\n", " report_to='all',\n", " load_best_model_at_end=True)\n", " \n", " # Initialise Trainer\n", " trainer = Trainer(model=model_dict['model'],\n", " args=training_args,\n", " train_dataset=tokenized_train,\n", " eval_dataset=tokenized_val,\n", " tokenizer=model_dict['tokenizer'],\n", " data_collator=data_collator,\n", " compute_metrics=compute_metrics)\n", " \n", " # Finetune the model\n", " train_result = trainer.train()\n", " \n", " # Log training metrics and save the state\n", " metrics = train_result.metrics\n", " trainer.log_metrics(\"train\", metrics)\n", " trainer.save_metrics(\"train\", metrics)\n", " trainer.save_state()\n", " \n", " # Log validation metrics\n", " metrics = trainer.evaluate()\n", " trainer.log_metrics(\"eval\", metrics)\n", " trainer.save_metrics(\"eval\", metrics)\n", " \n", " # Log test metrics\n", " predictions, labels, metrics = trainer.predict(test_dataset, metric_key_prefix=\"predict\")\n", " predictions = np.argmax(predictions, axis=-1)\n", " trainer.log_metrics(\"test\", metrics)\n", " trainer.save_metrics(\"test\", metrics)" ], "id": "e4a09abf-6230-44f4-87b0-fd630b7c502f", "execution_count": 10, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "mm-FMWWblx1O" }, "source": [ "model_names = dict(danish='flax-community/roberta-base-danish',\n", " scandi='flax-community/roberta-large-scandi',#'Maltehb/roberta-base-scandinavian',\n", " multi='xlm-roberta-base',\n", " multilarge='xlm-roberta-large',\n", " botxo='Maltehb/danish-bert-botxo')" ], "id": "mm-FMWWblx1O", "execution_count": 15, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "47aef8e786ea416e8fa99869a46d008f", "eb587b328a7e4a88bffad029b4943a1e", "bf71a524605b4a63b33468e43b212b62", "e9a69e05e82e48feb3f203e8ac7b7afa", "f3fa0e5bd0b34ffe92627a1615447d79", "03b37083ef7642f2942dd97b6e090c33", "18d6f6e2b90747258a2082796f2eeda9", "e33495db5c3343a999c9c6d807abc238", "ceb189e178b24e03b398b7ba37e63a02", "7b85ed3a5f034cbcbb3d87d9e5ac807d", "81f7f3a6c8dc4c9e8320fbcc8b4aa270", "f1187ab3d4a1491dacbf7f109810bf3c", "43d1cd650b26471cbf07beb1733b943a", "71410f8bf2d348ea9f3f173ea70c7829", "f8ae53cbfe38498095d7f7e2ee6b1b89", "dd961e0fdc604f0883ac6a674617b7ac", "e9a04e6cb6094f00b75a248aebc11dcf", "3a118dd7adf3459db52afc86e39e4681", "59dae515b4804e3f8a64372ec0cd5254", "a835eb4e43174c178a43fc20cf878f67", "bbd988fc15734daf834d7c1be0898f54", "59a87b9b20fd43e2ad4df96508e2a4e8", "0c06e7199e2440ff8df24c7f4d3816bf", "1721231f136a41a89226a6993e770708", "ea1a49df91de49b39d7c28620d396ed3", "f43ced84b4ae4d0290406ff29ac319b6", "249eab1d100f4e66891404bd0293cfac", "de5f4a79619b4361adb69ebf75f480d3", "38baac16961b491d99a195ec08b7146c", "31550c6938b245bdaff271f77e36a265", "86d213b1b67649e28127ca8919d1b508", "3833a9687f8440c8bc7159be7c5da6aa", "0eb974fa255b4b378c19580c9b443242", "72b317f6253640efa451b41609f418cc", "9dbc6bdd1fcd4258b27b0b36f537a52b", "48eab5fd5f5e4f4f9b1517a3f2e53052", "562f962ecfe1438ba3bd67406c3bc415", "e5c84b81d24a4120ad364cf7a8d92b13", "12ba4202432a4d9da9b4295bee72ffac", "4b846d71afd943278c1ac0cecfa13f65", "c23a9b479ca54b5082ced5e0359f54d9", "ce19ae483e1c4a9b90b3fecfea1330e4", "e953e23fd1364232b6cd39ec11ccaecb", "75b304f6ca5945b295a64fafcf283c4e", "cb932f442d0c4c8b959e455cc0ce1329", "5fa39708c9684ce3b93577f1a0197bf4", "4e7a38f355d84811a66936be929d270d", "50499dbed5cb470a9b5dd7f4e5144405", "1d3024c3e0374116bb856902590f6bfd", "1a406ffa02dc479aa0fcdfa41afcbb0b", "9f6792ca58ed4906bb671ae049e2b860", "54ef5c70c3f146e1a1fbd82f8aeedbb9", "ed8c1a4ff520407eb35402fb4e5fcc71", "68819af7768e486da1752ff27d2e4104", "4039c305be024488abecf4a635345c46", "80622e53e4124d0cbd506542f81f4818", "2cad6e60bb414f46b79720dfc4f87f9e", "fc6b7d2794bb44cdabafdfba208b8dc3", "f8fd5a767ff3404ebbadc6cc63110445", "8b638f038ef04ea7b2ad3bdcb90f0141", "305833c39a074d0296ef4511b6f61e7e", "f31cad3887d640d8b4322598eca1ca70", "02ecdd0d12614752a19b294395cb821c", "4c62a1fd18a54327a1f50bf3ca087027", "9daad6aa599343278ca5198897b390b3", "b803d1d51df9466db862f96fb8e49257", "266b34a7ee284ac590ffa999f1351330", "93a03570b7bf4f3f8494f7b5677a7c70", "543ff4b83d804f52afbcb7a57ca22570", "468d17728336411db088c69330f4de68", "fe17854a2dbe4949aeb4efb309cab6fc", "a1feb0e41e2f47e28f550ed81b61e952", "ca9eb397fcd640b48838e58579b475bc", "4234b3e6815040af90d298f6ab2a808e", "30739849e0db45daa664d8992ab6f458", "24dc9ed72f8048fe8b174f2f1ef6e2ca", "641876e099e84c44854262d5e709409e", "f8ab57bd0aba4f608a8f7549baa4ead4", "75b2a86adb434e8cb3358a3461be99e6", "e005269e95704fae8ff9ebe8f932d023", "0217f8a6209948cb9334eeaf5c489b1b", "8589613c58db44bc959fc43c003dc689", "168ee56a9f65422ca6c503ea29e8a825", "b5f3ea592806493abe87f29fbe8059fb", "a53357f0785046afbbfcc408a5aa0f19", "3c4b3cd5af584ad584da81f19a26eec0", "9097e5ba3c00454c823871d38393907b", "5d0f563e826c4ef79ff1ed6cc7ab1c01", "9de41b790b2c4df88309b8e6e66972cc", "26274dbb3a324f3ca7a2a91ef24adb83", "783f5d6b893d4f2e968c249605773c9e", "1a597fd54ab04a4cb80147e93abc72b8", "f317aee64c0f43a08c8157413d7956e1", "1f37c1324fba4488a0d19cc1d623f887", "584ec18135294eb9a916c85385eb4bea", "8e67b29112a04c48a978cf648904ee61", "ece5245bfb5c4b02aa1e2527974f1cc6", "ab4e0611c3d7428b9ffa703f794852a7", "0df9c4b4ed0240f2871906f72e9e9839", "5731d37f432b4713b9b8be897b1f6686", "fa9d6f6c384241f192660b2668956ad4", "c04cd9397ecf4de69d5e7fc39311629c", "669706d500bc404ca65e16f939db02c5", "fc71353cc3b5434ba3c32d5c64069757" ] }, "id": "e863e244-a332-46e7-8bbe-fce0d0b46c57", "outputId": "5c9bf179-dded-42cd-9af3-ad7aba2dcee4" }, "source": [ "finetune(model_names['multilarge'], epochs=25, lr=5e-5, batch_size=32, save=False)" ], "id": "e863e244-a332-46e7-8bbe-fce0d0b46c57", "execution_count": 13, "outputs": [ { "output_type": "stream", "text": [ "404 Client Error: Not Found for url: https://huggingface.co/flax-community/roberta-large-scandi/resolve/main/pytorch_model.bin\n" ], "name": "stderr" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "47aef8e786ea416e8fa99869a46d008f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=498796983.0, style=ProgressStyle(descri…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/transformers/modeling_flax_pytorch_utils.py:201: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)\n", " pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)\n", "Some weights of the Flax model were not used when initializing the PyTorch model RobertaForTokenClassification: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']\n", "- This IS expected if you are initializing RobertaForTokenClassification from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).\n", "- This IS NOT expected if you are initializing RobertaForTokenClassification from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).\n", "Some weights of RobertaForTokenClassification were not initialized from the Flax model and are newly initialized: ['classifier.weight', 'classifier.bias', 'roberta.embeddings.position_ids']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ceb189e178b24e03b398b7ba37e63a02", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e9a04e6cb6094f00b75a248aebc11dcf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ea1a49df91de49b39d7c28620d396ed3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0eb974fa255b4b378c19580c9b443242", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=2.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c23a9b479ca54b5082ced5e0359f54d9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1d3024c3e0374116bb856902590f6bfd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2cad6e60bb414f46b79720dfc4f87f9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " \n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9daad6aa599343278ca5198897b390b3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ca9eb397fcd640b48838e58579b475bc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0217f8a6209948cb9334eeaf5c489b1b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", " \n", " " ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9de41b790b2c4df88309b8e6e66972cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ece5245bfb5c4b02aa1e2527974f1cc6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=1.0, style=Progress…" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n", "\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", "***** Running training *****\n", " Num examples = 4383\n", " Num Epochs = 25\n", " Instantaneous batch size per device = 32\n", " Total train batch size (w. parallel, distributed & accumulation) = 32\n", " Gradient Accumulation steps = 1\n", " Total optimization steps = 3425\n" ], "name": "stderr" }, { "output_type": "display_data", "data": { "text/html": [ "\n", "
Epoch | \n", "Training Loss | \n", "Validation Loss | \n", "Precision | \n", "Recall | \n", "F1 | \n", "Accuracy | \n", "
---|---|---|---|---|---|---|
1 | \n", "0.476500 | \n", "0.181083 | \n", "0.408898 | \n", "0.402083 | \n", "0.405462 | \n", "0.956252 | \n", "
2 | \n", "0.138400 | \n", "0.076793 | \n", "0.689720 | \n", "0.768750 | \n", "0.727094 | \n", "0.979965 | \n", "
3 | \n", "0.075900 | \n", "0.061824 | \n", "0.748062 | \n", "0.804167 | \n", "0.775100 | \n", "0.983062 | \n", "
4 | \n", "0.050200 | \n", "0.058904 | \n", "0.773694 | \n", "0.833333 | \n", "0.802407 | \n", "0.985192 | \n", "
5 | \n", "0.034500 | \n", "0.055440 | \n", "0.814961 | \n", "0.862500 | \n", "0.838057 | \n", "0.986837 | \n", "
6 | \n", "0.025200 | \n", "0.056832 | \n", "0.803502 | \n", "0.860417 | \n", "0.830986 | \n", "0.986643 | \n", "
7 | \n", "0.018300 | \n", "0.058509 | \n", "0.804000 | \n", "0.837500 | \n", "0.820408 | \n", "0.986643 | \n", "
"
],
"text/plain": [
" "
],
"text/plain": [
"\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" Epoch \n",
" Training Loss \n",
" Validation Loss \n",
" Precision \n",
" Recall \n",
" F1 \n",
" Accuracy \n",
" \n",
" \n",
" 1 \n",
" 0.476500 \n",
" 0.181083 \n",
" 0.408898 \n",
" 0.402083 \n",
" 0.405462 \n",
" 0.956252 \n",
" \n",
" \n",
" 2 \n",
" 0.138400 \n",
" 0.076793 \n",
" 0.689720 \n",
" 0.768750 \n",
" 0.727094 \n",
" 0.979965 \n",
" \n",
" \n",
" 3 \n",
" 0.075900 \n",
" 0.061824 \n",
" 0.748062 \n",
" 0.804167 \n",
" 0.775100 \n",
" 0.983062 \n",
" \n",
" \n",
" 4 \n",
" 0.050200 \n",
" 0.058904 \n",
" 0.773694 \n",
" 0.833333 \n",
" 0.802407 \n",
" 0.985192 \n",
" \n",
" \n",
" 5 \n",
" 0.034500 \n",
" 0.055440 \n",
" 0.814961 \n",
" 0.862500 \n",
" 0.838057 \n",
" 0.986837 \n",
" \n",
" \n",
" 6 \n",
" 0.025200 \n",
" 0.056832 \n",
" 0.803502 \n",
" 0.860417 \n",
" 0.830986 \n",
" 0.986643 \n",
" \n",
" \n",
" 7 \n",
" 0.018300 \n",
" 0.058509 \n",
" 0.804000 \n",
" 0.837500 \n",
" 0.820408 \n",
" 0.986643 \n",
" \n",
" \n",
" 8 \n",
" 0.013300 \n",
" 0.063613 \n",
" 0.832347 \n",
" 0.879167 \n",
" 0.855117 \n",
" 0.988289 \n",
" \n",
" \n",
" 9 \n",
" 0.011200 \n",
" 0.065774 \n",
" 0.818182 \n",
" 0.881250 \n",
" 0.848546 \n",
" 0.987224 \n",
" \n",
" \n",
" 10 \n",
" 0.008500 \n",
" 0.062434 \n",
" 0.853119 \n",
" 0.883333 \n",
" 0.867963 \n",
" 0.989063 \n",
" \n",
" \n",
" 11 \n",
" 0.007300 \n",
" 0.064465 \n",
" 0.836292 \n",
" 0.883333 \n",
" 0.859169 \n",
" 0.988966 \n",
" \n",
" \n",
" 12 \n",
" 0.005400 \n",
" 0.066295 \n",
" 0.854291 \n",
" 0.891667 \n",
" 0.872579 \n",
" 0.989160 \n",
" \n",
" \n",
" 13 \n",
" 0.004500 \n",
" 0.067713 \n",
" 0.850895 \n",
" 0.891667 \n",
" 0.870804 \n",
" 0.989644 \n",
" \n",
" \n",
" 14 \n",
" 0.004100 \n",
" 0.068105 \n",
" 0.854000 \n",
" 0.889583 \n",
" 0.871429 \n",
" 0.989160 \n",
" \n",
" \n",
" 15 \n",
" 0.003400 \n",
" 0.069819 \n",
" 0.864372 \n",
" 0.889583 \n",
" 0.876797 \n",
" 0.989837 \n",
" \n",
" \n",
" 16 \n",
" 0.002700 \n",
" 0.074552 \n",
" 0.856275 \n",
" 0.881250 \n",
" 0.868583 \n",
" 0.989063 \n",
" \n",
" \n",
" 17 \n",
" 0.002500 \n",
" 0.074190 \n",
" 0.873727 \n",
" 0.893750 \n",
" 0.883625 \n",
" 0.989741 \n",
" \n",
" \n",
" 18 \n",
" 0.001800 \n",
" 0.074841 \n",
" 0.860000 \n",
" 0.895833 \n",
" 0.877551 \n",
" 0.988870 \n",
" \n",
" \n",
" 19 \n",
" 0.001700 \n",
" 0.074929 \n",
" 0.870707 \n",
" 0.897917 \n",
" 0.884103 \n",
" 0.989741 \n",
" \n",
" \n",
" 20 \n",
" 0.001800 \n",
" 0.078682 \n",
" 0.855422 \n",
" 0.887500 \n",
" 0.871166 \n",
" 0.989160 \n",
" \n",
" \n",
" \n",
"21 \n",
" 0.001600 \n",
" 0.076686 \n",
" 0.866935 \n",
" 0.895833 \n",
" 0.881148 \n",
" 0.989741 \n",
"