diff --git "a/notebooks/evaluation.ipynb" "b/notebooks/evaluation.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/evaluation.ipynb" @@ -0,0 +1,6752 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "colab": { + "name": "evaluation.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "b3ac84dcf48f4ba8a65aecd1df5a1b68": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_490a5358971a45fa92989776dc6757c3", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_f5be7a9c6f1941659c806255a9315b7c", + "IPY_MODEL_9eb5033009ee441eb164d862b4b2c39c" + ] + } + }, + "490a5358971a45fa92989776dc6757c3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f5be7a9c6f1941659c806255a9315b7c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_ed07265cff0c4c82b95e0d0c47359edb", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 618, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 618, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_46a87c4067114a31ae6e34c3a9464f76" + } + }, + "9eb5033009ee441eb164d862b4b2c39c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_04a42f0ccb10413494e891907fc547d9", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 618/618 [00:01<00:00, 464B/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_2188756da7e74badb48a459ea15c02d5" + } + }, + "ed07265cff0c4c82b95e0d0c47359edb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "46a87c4067114a31ae6e34c3a9464f76": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "04a42f0ccb10413494e891907fc547d9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "2188756da7e74badb48a459ea15c02d5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a1db2167447249ed95dfb78a97c24bf9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_68ce97d8904e4b398af538a7ad1ed1ea", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_245bfeb4fb5542f9900cebd0e3cccc74", + "IPY_MODEL_501c0ab625ed4f0bb9954de0b97e90f1" + ] + } + }, + "68ce97d8904e4b398af538a7ad1ed1ea": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "245bfeb4fb5542f9900cebd0e3cccc74": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_f8140d5334e14d7988c17bbbae05b08e", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1388356, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1388356, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0b6cce6d1a0e4de48ae7eb967dfeda87" + } + }, + "501c0ab625ed4f0bb9954de0b97e90f1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_bac6501d76ce4b5ba56ab1effcedccbb", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1.39M/1.39M [00:00<00:00, 5.74MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_cf17e5af2e7e407e9d96e0325ad733de" + } + }, + "f8140d5334e14d7988c17bbbae05b08e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "0b6cce6d1a0e4de48ae7eb967dfeda87": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "bac6501d76ce4b5ba56ab1effcedccbb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "cf17e5af2e7e407e9d96e0325ad733de": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "d603ce6680d3425e8c145e77bc0e0e30": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_bbd848e83f30482dab926e53a7188f37", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_29c28419ac2848ca8ce6f73eea0e3425", + "IPY_MODEL_ac01f9efc2ec4eb2af6dfd956467ab8e" + ] + } + }, + "bbd848e83f30482dab926e53a7188f37": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "29c28419ac2848ca8ce6f73eea0e3425": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_82c474271a584a46b9af812bd9947ff7", + "_dom_classes": [], + "description": "Running tokenizer on dataset #0: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5dea54f5878c4ea1b91aea2d6c01dcc9" + } + }, + "ac01f9efc2ec4eb2af6dfd956467ab8e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_1f5a386436f142999c9cd61a5567167f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:01<00:00, 1.82ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4d13f40b545245d88a9ce9cfa738a59c" + } + }, + "82c474271a584a46b9af812bd9947ff7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "5dea54f5878c4ea1b91aea2d6c01dcc9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1f5a386436f142999c9cd61a5567167f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4d13f40b545245d88a9ce9cfa738a59c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "bacd59325efe4e7a8289e2e77eca3f97": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_b6493940baf04dd6a461abd3d123d20d", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_1b5c2a40effd4df8a0e29f7589e4ce69", + "IPY_MODEL_516c9f479605404886582c4af2d4860d" + ] + } + }, + "b6493940baf04dd6a461abd3d123d20d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1b5c2a40effd4df8a0e29f7589e4ce69": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_7d5cce86ee7c4016a6cff03887038660", + "_dom_classes": [], + "description": "Running tokenizer on dataset #1: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_36aa47fe9e22473ea71a0b6a4d740b35" + } + }, + "516c9f479605404886582c4af2d4860d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_b98d5bb115ce4458919bc628a7c453cc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:01<00:00, 1.96ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_6437a7817c244f54b4ca06d78d6aeff7" + } + }, + "7d5cce86ee7c4016a6cff03887038660": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "36aa47fe9e22473ea71a0b6a4d740b35": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "b98d5bb115ce4458919bc628a7c453cc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "6437a7817c244f54b4ca06d78d6aeff7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "3ac65aaeae574af5b7eea30b4a873ec2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_8d99822c6a514136aa164846d039bbfc", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_105f722c571f47b2b7a7184c9fe45c18", + "IPY_MODEL_4a6ab6cdeb0943ba8cf8486caadf2f8d" + ] + } + }, + "8d99822c6a514136aa164846d039bbfc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "105f722c571f47b2b7a7184c9fe45c18": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_4caf7f4e912d454b8ecf3f3971ede95e", + "_dom_classes": [], + "description": "Running tokenizer on dataset #2: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_36873c0c51904573809bb3eadd172383" + } + }, + "4a6ab6cdeb0943ba8cf8486caadf2f8d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_15d31d2a49804328b4c80fa98dae8ff1", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:00<00:00, 2.03ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_7fa7cbad599741edbf0c099bf2668494" + } + }, + "4caf7f4e912d454b8ecf3f3971ede95e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "36873c0c51904573809bb3eadd172383": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "15d31d2a49804328b4c80fa98dae8ff1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "7fa7cbad599741edbf0c099bf2668494": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f19e664d094f46f0a94d7410685d67eb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_74626cdc554848fab75607aed0324aa3", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_f2bcfb2929594116b9367a18a1778aa2", + "IPY_MODEL_709d06d2c8614f9b97bf86ce8ed1118f" + ] + } + }, + "74626cdc554848fab75607aed0324aa3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f2bcfb2929594116b9367a18a1778aa2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_65ca9ae8108d46a1999573c3267f16bc", + "_dom_classes": [], + "description": "Running tokenizer on dataset #3: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_520e4029bca14128a6eb4e2bbd4c78ed" + } + }, + "709d06d2c8614f9b97bf86ce8ed1118f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_b8e8db36327e4706b6ab435a698cb3fd", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:00<00:00, 2.03ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_7d948bdb11c0427b8c49c48e7c5d9772" + } + }, + "65ca9ae8108d46a1999573c3267f16bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "520e4029bca14128a6eb4e2bbd4c78ed": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "b8e8db36327e4706b6ab435a698cb3fd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "7d948bdb11c0427b8c49c48e7c5d9772": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1de82500b0d34e5c9f6c5f995f27ea03": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_c7e9741c804e421898f9c45cde1ce7cd", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_5178228787094bfba3b671af67e3df0f", + "IPY_MODEL_3aa19fa9dda74118844e11a876039a0b" + ] + } + }, + "c7e9741c804e421898f9c45cde1ce7cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "5178228787094bfba3b671af67e3df0f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_2afa7d80cc004014b4a672bc0b683fce", + "_dom_classes": [], + "description": "Downloading: ", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2482, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2482, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b2757df8c2e14c7d90befd479221e5c5" + } + }, + "3aa19fa9dda74118844e11a876039a0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_9ada8d8e30114064be38f3b5d4645f36", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 6.34k/? [00:27<00:00, 228B/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a391191463d0455c8fd4f83b8ae69c8f" + } + }, + "2afa7d80cc004014b4a672bc0b683fce": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "b2757df8c2e14c7d90befd479221e5c5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9ada8d8e30114064be38f3b5d4645f36": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "a391191463d0455c8fd4f83b8ae69c8f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "47aef8e786ea416e8fa99869a46d008f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_eb587b328a7e4a88bffad029b4943a1e", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_bf71a524605b4a63b33468e43b212b62", + "IPY_MODEL_e9a69e05e82e48feb3f203e8ac7b7afa" + ] + } + }, + "eb587b328a7e4a88bffad029b4943a1e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "bf71a524605b4a63b33468e43b212b62": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_f3fa0e5bd0b34ffe92627a1615447d79", + "_dom_classes": [], + "description": "Downloading: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 498796983, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 498796983, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_03b37083ef7642f2942dd97b6e090c33" + } + }, + "e9a69e05e82e48feb3f203e8ac7b7afa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_18d6f6e2b90747258a2082796f2eeda9", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 499M/499M [05:39<00:00, 1.47MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e33495db5c3343a999c9c6d807abc238" + } + }, + "f3fa0e5bd0b34ffe92627a1615447d79": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "03b37083ef7642f2942dd97b6e090c33": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "18d6f6e2b90747258a2082796f2eeda9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "e33495db5c3343a999c9c6d807abc238": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "ceb189e178b24e03b398b7ba37e63a02": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_7b85ed3a5f034cbcbb3d87d9e5ac807d", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_81f7f3a6c8dc4c9e8320fbcc8b4aa270", + "IPY_MODEL_f1187ab3d4a1491dacbf7f109810bf3c" + ] + } + }, + "7b85ed3a5f034cbcbb3d87d9e5ac807d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "81f7f3a6c8dc4c9e8320fbcc8b4aa270": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_43d1cd650b26471cbf07beb1733b943a", + "_dom_classes": [], + "description": "Running tokenizer on dataset #0: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_71410f8bf2d348ea9f3f173ea70c7829" + } + }, + "f1187ab3d4a1491dacbf7f109810bf3c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_f8ae53cbfe38498095d7f7e2ee6b1b89", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:01<00:00, 1.99ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_dd961e0fdc604f0883ac6a674617b7ac" + } + }, + "43d1cd650b26471cbf07beb1733b943a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "71410f8bf2d348ea9f3f173ea70c7829": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f8ae53cbfe38498095d7f7e2ee6b1b89": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "dd961e0fdc604f0883ac6a674617b7ac": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "e9a04e6cb6094f00b75a248aebc11dcf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_3a118dd7adf3459db52afc86e39e4681", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_59dae515b4804e3f8a64372ec0cd5254", + "IPY_MODEL_a835eb4e43174c178a43fc20cf878f67" + ] + } + }, + "3a118dd7adf3459db52afc86e39e4681": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "59dae515b4804e3f8a64372ec0cd5254": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_bbd988fc15734daf834d7c1be0898f54", + "_dom_classes": [], + "description": "Running tokenizer on dataset #1: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_59a87b9b20fd43e2ad4df96508e2a4e8" + } + }, + "a835eb4e43174c178a43fc20cf878f67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_0c06e7199e2440ff8df24c7f4d3816bf", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:00<00:00, 2.01ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_1721231f136a41a89226a6993e770708" + } + }, + "bbd988fc15734daf834d7c1be0898f54": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "59a87b9b20fd43e2ad4df96508e2a4e8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0c06e7199e2440ff8df24c7f4d3816bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "1721231f136a41a89226a6993e770708": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "ea1a49df91de49b39d7c28620d396ed3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_f43ced84b4ae4d0290406ff29ac319b6", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_249eab1d100f4e66891404bd0293cfac", + "IPY_MODEL_de5f4a79619b4361adb69ebf75f480d3" + ] + } + }, + "f43ced84b4ae4d0290406ff29ac319b6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "249eab1d100f4e66891404bd0293cfac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_38baac16961b491d99a195ec08b7146c", + "_dom_classes": [], + "description": "Running tokenizer on dataset #2: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_31550c6938b245bdaff271f77e36a265" + } + }, + "de5f4a79619b4361adb69ebf75f480d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_86d213b1b67649e28127ca8919d1b508", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:00<00:00, 2.09ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3833a9687f8440c8bc7159be7c5da6aa" + } + }, + "38baac16961b491d99a195ec08b7146c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "31550c6938b245bdaff271f77e36a265": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "86d213b1b67649e28127ca8919d1b508": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "3833a9687f8440c8bc7159be7c5da6aa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0eb974fa255b4b378c19580c9b443242": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_72b317f6253640efa451b41609f418cc", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_9dbc6bdd1fcd4258b27b0b36f537a52b", + "IPY_MODEL_48eab5fd5f5e4f4f9b1517a3f2e53052" + ] + } + }, + "72b317f6253640efa451b41609f418cc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9dbc6bdd1fcd4258b27b0b36f537a52b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_562f962ecfe1438ba3bd67406c3bc415", + "_dom_classes": [], + "description": "Running tokenizer on dataset #3: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e5c84b81d24a4120ad364cf7a8d92b13" + } + }, + "48eab5fd5f5e4f4f9b1517a3f2e53052": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_12ba4202432a4d9da9b4295bee72ffac", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2/2 [00:00<00:00, 2.55ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4b846d71afd943278c1ac0cecfa13f65" + } + }, + "562f962ecfe1438ba3bd67406c3bc415": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "e5c84b81d24a4120ad364cf7a8d92b13": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "12ba4202432a4d9da9b4295bee72ffac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4b846d71afd943278c1ac0cecfa13f65": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "c23a9b479ca54b5082ced5e0359f54d9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_ce19ae483e1c4a9b90b3fecfea1330e4", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_e953e23fd1364232b6cd39ec11ccaecb", + "IPY_MODEL_75b304f6ca5945b295a64fafcf283c4e" + ] + } + }, + "ce19ae483e1c4a9b90b3fecfea1330e4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "e953e23fd1364232b6cd39ec11ccaecb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_cb932f442d0c4c8b959e455cc0ce1329", + "_dom_classes": [], + "description": "Running tokenizer on dataset #0: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5fa39708c9684ce3b93577f1a0197bf4" + } + }, + "75b304f6ca5945b295a64fafcf283c4e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_4e7a38f355d84811a66936be929d270d", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 4.83ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_50499dbed5cb470a9b5dd7f4e5144405" + } + }, + "cb932f442d0c4c8b959e455cc0ce1329": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "5fa39708c9684ce3b93577f1a0197bf4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4e7a38f355d84811a66936be929d270d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "50499dbed5cb470a9b5dd7f4e5144405": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1d3024c3e0374116bb856902590f6bfd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_1a406ffa02dc479aa0fcdfa41afcbb0b", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_9f6792ca58ed4906bb671ae049e2b860", + "IPY_MODEL_54ef5c70c3f146e1a1fbd82f8aeedbb9" + ] + } + }, + "1a406ffa02dc479aa0fcdfa41afcbb0b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9f6792ca58ed4906bb671ae049e2b860": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_ed8c1a4ff520407eb35402fb4e5fcc71", + "_dom_classes": [], + "description": "Running tokenizer on dataset #1: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_68819af7768e486da1752ff27d2e4104" + } + }, + "54ef5c70c3f146e1a1fbd82f8aeedbb9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_4039c305be024488abecf4a635345c46", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 2.59ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_80622e53e4124d0cbd506542f81f4818" + } + }, + "ed8c1a4ff520407eb35402fb4e5fcc71": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "68819af7768e486da1752ff27d2e4104": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4039c305be024488abecf4a635345c46": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "80622e53e4124d0cbd506542f81f4818": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2cad6e60bb414f46b79720dfc4f87f9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_fc6b7d2794bb44cdabafdfba208b8dc3", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_f8fd5a767ff3404ebbadc6cc63110445", + "IPY_MODEL_8b638f038ef04ea7b2ad3bdcb90f0141" + ] + } + }, + "fc6b7d2794bb44cdabafdfba208b8dc3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f8fd5a767ff3404ebbadc6cc63110445": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_305833c39a074d0296ef4511b6f61e7e", + "_dom_classes": [], + "description": "Running tokenizer on dataset #2: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_f31cad3887d640d8b4322598eca1ca70" + } + }, + "8b638f038ef04ea7b2ad3bdcb90f0141": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_02ecdd0d12614752a19b294395cb821c", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 3.82ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4c62a1fd18a54327a1f50bf3ca087027" + } + }, + "305833c39a074d0296ef4511b6f61e7e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "f31cad3887d640d8b4322598eca1ca70": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "02ecdd0d12614752a19b294395cb821c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4c62a1fd18a54327a1f50bf3ca087027": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9daad6aa599343278ca5198897b390b3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_b803d1d51df9466db862f96fb8e49257", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_266b34a7ee284ac590ffa999f1351330", + "IPY_MODEL_93a03570b7bf4f3f8494f7b5677a7c70" + ] + } + }, + "b803d1d51df9466db862f96fb8e49257": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "266b34a7ee284ac590ffa999f1351330": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_543ff4b83d804f52afbcb7a57ca22570", + "_dom_classes": [], + "description": "Running tokenizer on dataset #3: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_468d17728336411db088c69330f4de68" + } + }, + "93a03570b7bf4f3f8494f7b5677a7c70": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_fe17854a2dbe4949aeb4efb309cab6fc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 3.93ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a1feb0e41e2f47e28f550ed81b61e952" + } + }, + "543ff4b83d804f52afbcb7a57ca22570": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "468d17728336411db088c69330f4de68": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "fe17854a2dbe4949aeb4efb309cab6fc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "a1feb0e41e2f47e28f550ed81b61e952": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "ca9eb397fcd640b48838e58579b475bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_4234b3e6815040af90d298f6ab2a808e", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_30739849e0db45daa664d8992ab6f458", + "IPY_MODEL_24dc9ed72f8048fe8b174f2f1ef6e2ca" + ] + } + }, + "4234b3e6815040af90d298f6ab2a808e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "30739849e0db45daa664d8992ab6f458": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_641876e099e84c44854262d5e709409e", + "_dom_classes": [], + "description": "Running tokenizer on dataset #0: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_f8ab57bd0aba4f608a8f7549baa4ead4" + } + }, + "24dc9ed72f8048fe8b174f2f1ef6e2ca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_75b2a86adb434e8cb3358a3461be99e6", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 2.70ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e005269e95704fae8ff9ebe8f932d023" + } + }, + "641876e099e84c44854262d5e709409e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "f8ab57bd0aba4f608a8f7549baa4ead4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "75b2a86adb434e8cb3358a3461be99e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "e005269e95704fae8ff9ebe8f932d023": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0217f8a6209948cb9334eeaf5c489b1b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_8589613c58db44bc959fc43c003dc689", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_168ee56a9f65422ca6c503ea29e8a825", + "IPY_MODEL_b5f3ea592806493abe87f29fbe8059fb" + ] + } + }, + "8589613c58db44bc959fc43c003dc689": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "168ee56a9f65422ca6c503ea29e8a825": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_a53357f0785046afbbfcc408a5aa0f19", + "_dom_classes": [], + "description": "Running tokenizer on dataset #1: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3c4b3cd5af584ad584da81f19a26eec0" + } + }, + "b5f3ea592806493abe87f29fbe8059fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_9097e5ba3c00454c823871d38393907b", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 3.40ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5d0f563e826c4ef79ff1ed6cc7ab1c01" + } + }, + "a53357f0785046afbbfcc408a5aa0f19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "3c4b3cd5af584ad584da81f19a26eec0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9097e5ba3c00454c823871d38393907b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "5d0f563e826c4ef79ff1ed6cc7ab1c01": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9de41b790b2c4df88309b8e6e66972cc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_26274dbb3a324f3ca7a2a91ef24adb83", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_783f5d6b893d4f2e968c249605773c9e", + "IPY_MODEL_1a597fd54ab04a4cb80147e93abc72b8" + ] + } + }, + "26274dbb3a324f3ca7a2a91ef24adb83": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "783f5d6b893d4f2e968c249605773c9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_f317aee64c0f43a08c8157413d7956e1", + "_dom_classes": [], + "description": "Running tokenizer on dataset #2: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_1f37c1324fba4488a0d19cc1d623f887" + } + }, + "1a597fd54ab04a4cb80147e93abc72b8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_584ec18135294eb9a916c85385eb4bea", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 4.69ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_8e67b29112a04c48a978cf648904ee61" + } + }, + "f317aee64c0f43a08c8157413d7956e1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "1f37c1324fba4488a0d19cc1d623f887": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "584ec18135294eb9a916c85385eb4bea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "8e67b29112a04c48a978cf648904ee61": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "ece5245bfb5c4b02aa1e2527974f1cc6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_ab4e0611c3d7428b9ffa703f794852a7", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_0df9c4b4ed0240f2871906f72e9e9839", + "IPY_MODEL_5731d37f432b4713b9b8be897b1f6686" + ] + } + }, + "ab4e0611c3d7428b9ffa703f794852a7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0df9c4b4ed0240f2871906f72e9e9839": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_fa9d6f6c384241f192660b2668956ad4", + "_dom_classes": [], + "description": "Running tokenizer on dataset #3: 100%", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_c04cd9397ecf4de69d5e7fc39311629c" + } + }, + "5731d37f432b4713b9b8be897b1f6686": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_669706d500bc404ca65e16f939db02c5", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 5.11ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_fc71353cc3b5434ba3c32d5c64069757" + } + }, + "fa9d6f6c384241f192660b2668956ad4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "c04cd9397ecf4de69d5e7fc39311629c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "669706d500bc404ca65e16f939db02c5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "fc71353cc3b5434ba3c32d5c64069757": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QnKI9cdUFw23", + "outputId": "82086023-d380-4e1a-f11a-b0047bb10190" + }, + "source": [ + "!pip install danlp transformers datasets numpy flax seqeval" + ], + "id": "QnKI9cdUFw23", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting danlp\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/62/6b/3a245c069f0a5376e565d67c2f9fb04a39e4d7e94c93c2d27e57c7bf9012/danlp-0.0.12-py3-none-any.whl (71kB)\n", + "\r\u001b[K |████▋ | 10kB 16.0MB/s eta 0:00:01\r\u001b[K |█████████▏ | 20kB 22.1MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 30kB 25.0MB/s eta 0:00:01\r\u001b[K |██████████████████▍ | 40kB 27.0MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 51kB 29.0MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 61kB 29.9MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 71kB 9.4MB/s \n", + "\u001b[?25hCollecting transformers\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/fd/1a/41c644c963249fd7f3836d926afa1e3f1cc234a1c40d80c5f03ad8f6f1b2/transformers-4.8.2-py3-none-any.whl (2.5MB)\n", + "\u001b[K |████████████████████████████████| 2.5MB 33.3MB/s \n", + "\u001b[?25hCollecting datasets\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/86/27/9c91ddee87b06d2de12f134c5171a49890427e398389f07f6463485723c3/datasets-1.9.0-py3-none-any.whl (262kB)\n", + "\u001b[K |████████████████████████████████| 266kB 42.9MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (1.19.5)\n", + "Collecting flax\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f6/21/21ca1f4831ac24646578d2545c4db9a8369b9da4a4b7dcf067feee312b45/flax-0.3.4-py3-none-any.whl (183kB)\n", + "\u001b[K |████████████████████████████████| 184kB 49.2MB/s \n", + "\u001b[?25hCollecting seqeval\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)\n", + "\u001b[K |████████████████████████████████| 51kB 4.7MB/s \n", + "\u001b[?25hRequirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (from danlp) (3.10.0)\n", + "Collecting conllu\n", + " Downloading https://files.pythonhosted.org/packages/ae/be/be6959c3ff2dbfdd87de4be0ccdff577835b5d08b1d25bf7fd4aaf0d7add/conllu-4.4-py2.py3-none-any.whl\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from danlp) (1.1.5)\n", + "Collecting pyconll\n", + " Downloading https://files.pythonhosted.org/packages/0a/4c/edf12b4b211f8a0f7f85a52ed4b50cd453ac96e9b751427e0296eb7ae42a/pyconll-3.1.0-py3-none-any.whl\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from danlp) (4.41.1)\n", + "Collecting sacremoses\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)\n", + "\u001b[K |████████████████████████████████| 901kB 46.1MB/s \n", + "\u001b[?25hCollecting huggingface-hub==0.0.12\n", + " Downloading https://files.pythonhosted.org/packages/2f/ee/97e253668fda9b17e968b3f97b2f8e53aa0127e8807d24a547687423fe0b/huggingface_hub-0.0.12-py3-none-any.whl\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from transformers) (3.13)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (4.6.0)\n", + "Collecting tokenizers<0.11,>=0.10.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)\n", + "\u001b[K |████████████████████████████████| 3.3MB 36.7MB/s \n", + "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", + "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n", + "Collecting fsspec>=2021.05.0\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0e/3a/666e63625a19883ae8e1674099e631f9737bd5478c4790e5ad49c5ac5261/fsspec-2021.6.1-py3-none-any.whl (115kB)\n", + "\u001b[K |████████████████████████████████| 122kB 52.7MB/s \n", + "\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n", + "Collecting xxhash\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/4f/0a862cad26aa2ed7a7cd87178cbbfa824fc1383e472d63596a0d018374e7/xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243kB)\n", + "\u001b[K |████████████████████████████████| 245kB 49.9MB/s \n", + "\u001b[?25hRequirement already satisfied: msgpack in /usr/local/lib/python3.7/dist-packages (from flax) (1.0.2)\n", + "Requirement already satisfied: jax>=0.2.13 in /usr/local/lib/python3.7/dist-packages (from flax) (0.2.13)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from flax) (3.2.2)\n", + "Collecting optax\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/07/48/4f65dbb5ec096917ec039ba2c7eccf97ee05a4157e0e965a45ed3b7a13f9/optax-0.0.9-py3-none-any.whl (118kB)\n", + "\u001b[K |████████████████████████████████| 122kB 53.9MB/s \n", + "\u001b[?25hRequirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (0.22.2.post1)\n", + "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy->danlp) (1.15.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy->danlp) (1.3.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->danlp) (2.8.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->danlp) (2018.9)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub==0.0.12->transformers) (3.7.4.3)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.5.30)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", + "Requirement already satisfied: absl-py in /usr/local/lib/python3.7/dist-packages (from jax>=0.2.13->flax) (0.12.0)\n", + "Requirement already satisfied: opt-einsum in /usr/local/lib/python3.7/dist-packages (from jax>=0.2.13->flax) (3.3.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->flax) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->flax) (0.10.0)\n", + "Requirement already satisfied: jaxlib>=0.1.37 in /usr/local/lib/python3.7/dist-packages (from optax->flax) (0.1.66+cuda110)\n", + "Collecting chex>=0.0.4\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0f/95/ccd2da57155c019efb3a60e3e5ecb9da431e19ebb16cce1e6981d615d75e/chex-0.0.8-py3-none-any.whl (57kB)\n", + "\u001b[K |████████████████████████████████| 61kB 9.6MB/s \n", + "\u001b[?25hRequirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy->danlp) (3.1.1)\n", + "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.7/dist-packages (from jaxlib>=0.1.37->optax->flax) (1.12)\n", + "Requirement already satisfied: toolz>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from chex>=0.0.4->optax->flax) (0.11.1)\n", + "Requirement already satisfied: dm-tree>=0.1.5 in /usr/local/lib/python3.7/dist-packages (from chex>=0.0.4->optax->flax) (0.1.6)\n", + "Building wheels for collected packages: seqeval\n", + " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for seqeval: filename=seqeval-1.2.2-cp37-none-any.whl size=16184 sha256=81ccf3b27c5f3a0c3ce7d2c6f1f8de09ed86269e87a025f4fb1e192824c0964a\n", + " Stored in directory: /root/.cache/pip/wheels/52/df/1b/45d75646c37428f7e626214704a0e35bd3cfc32eda37e59e5f\n", + "Successfully built seqeval\n", + "Installing collected packages: conllu, pyconll, danlp, sacremoses, huggingface-hub, tokenizers, transformers, fsspec, xxhash, datasets, chex, optax, flax, seqeval\n", + "Successfully installed chex-0.0.8 conllu-4.4 danlp-0.0.12 datasets-1.9.0 flax-0.3.4 fsspec-2021.6.1 huggingface-hub-0.0.12 optax-0.0.9 pyconll-3.1.0 sacremoses-0.0.45 seqeval-1.2.2 tokenizers-0.10.3 transformers-4.8.2 xxhash-2.0.2\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "e72bf8e7-5819-4e14-a0e1-384234089c84" + }, + "source": [ + "from danlp.datasets import DDT\n", + "from transformers import (AutoConfig, AutoTokenizer, AutoModelForTokenClassification, \n", + " DataCollatorForTokenClassification, TrainingArguments, Trainer)\n", + "from datasets import Dataset, load_metric\n", + "from functools import partial\n", + "import numpy as np" + ], + "id": "e72bf8e7-5819-4e14-a0e1-384234089c84", + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7bc1ab27-c2bb-42fc-94d7-a94e4d1dc4e4" + }, + "source": [ + "# Evaluation of Language Models for Danish" + ], + "id": "7bc1ab27-c2bb-42fc-94d7-a94e4d1dc4e4" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "79792719-51b5-4c4f-a4ab-7124719b9853" + }, + "source": [ + "This notebook is an investigation into how much, if anything, is gained from including more languages into the training set of a language model at pretraining. We will finetune and evaluate three models:\n", + "\n", + "1. `flax-community/roberta-base-danish` is a Danish RoBERTa-base model trained on the Danish part of the [mC4](https://github.com/allenai/allennlp/discussions/5265) dataset;\n", + "2. `flax-community/roberta-large-scandi` is a Scandinavian RoBERTa-base model, trained on the Danish, Norwegian and Swedish part of the [mC4](https://github.com/allenai/allennlp/discussions/5265) dataset;\n", + "3. `xlm-roberta-base` is a multilingual RoBERTa-base model trained on over 100 languages, on a filtered subset of the Common Crawl dataset." + ], + "id": "79792719-51b5-4c4f-a4ab-7124719b9853" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f026a443-e2bf-4f51-b934-629b277c3530" + }, + "source": [ + "## Named Entity Recognition" + ], + "id": "f026a443-e2bf-4f51-b934-629b277c3530" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7aee4cb1-28d3-40da-a939-00e55ad5ce2c" + }, + "source": [ + "### Preparing the datasets" + ], + "id": "7aee4cb1-28d3-40da-a939-00e55ad5ce2c" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5c463704-5c8b-4e88-9fca-db7000b70aed" + }, + "source": [ + "We start by loading the DaNE dataset for the NER task." + ], + "id": "5c463704-5c8b-4e88-9fca-db7000b70aed" + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8daf7629-311d-4fba-916b-b9c7f6debfa4", + "outputId": "6f9bfa7e-3d7b-4230-c249-8bb74671ffc9" + }, + "source": [ + "# Load the DaNE data\n", + "train, val, test = DDT().load_as_simple_ner(predefined_splits=True)\n", + "\n", + "# Split docs and labels\n", + "train_docs, train_labels = train\n", + "val_docs, val_labels = val\n", + "test_docs, test_labels = test\n", + "\n", + "print(f'Loaded {len(train_docs)} training samples, '\n", + " f'{len(val_docs)} validation samples and '\n", + " f'{len(test_docs)} test samples.')" + ], + "id": "8daf7629-311d-4fba-916b-b9c7f6debfa4", + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading file /tmp/tmptw7g3c2s\n", + "Loaded 4383 training samples, 564 validation samples and 565 test samples.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3bc2922d-3c32-4e96-ba08-07e0c56a387f" + }, + "source": [ + "We next set up the labels in the dataset, converting them to a numeric representation." + ], + "id": "3bc2922d-3c32-4e96-ba08-07e0c56a387f" + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e0082911-f46f-45d3-ac53-aa59296fabc0", + "outputId": "e1d0b1a6-2a92-4d1b-a72f-4facd464b9f2" + }, + "source": [ + "# Get the set of all unique labels in the dataset\n", + "unique_labels = list({lbl for lbl_list in train_labels for lbl in lbl_list})\n", + "\n", + "# Set up a numeric representation of the labels\n", + "label2id = {unique_labels[id]: id for id in range(len(unique_labels))}\n", + "id2label = {id: unique_labels[id] for id in range(len(unique_labels))}\n", + "\n", + "print(f'There are {len(unique_labels)} unique labels in the dataset:')\n", + "print(unique_labels)" + ], + "id": "e0082911-f46f-45d3-ac53-aa59296fabc0", + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "There are 9 unique labels in the dataset:\n", + "['B-PER', 'I-PER', 'O', 'I-LOC', 'B-ORG', 'B-MISC', 'I-MISC', 'B-LOC', 'I-ORG']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a4dd43e2-accf-403e-a39d-51d68dd9c5de" + }, + "source": [ + "### Setting up the models" + ], + "id": "a4dd43e2-accf-403e-a39d-51d68dd9c5de" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b9b9024f-f981-4a08-8dcf-ce73c4d77a35" + }, + "source": [ + "Next, we load the tokenisers and the models that we want to compare." + ], + "id": "b9b9024f-f981-4a08-8dcf-ce73c4d77a35" + }, + { + "cell_type": "code", + "metadata": { + "id": "713a522b-511c-4dd3-9948-66e3bf8cb40b" + }, + "source": [ + "def prepare_model(name: str) -> dict: \n", + " config = AutoConfig.from_pretrained(name, \n", + " num_labels=len(unique_labels),\n", + " label2id=label2id,\n", + " id2label=id2label,\n", + " finetuning_task='ner')\n", + " \n", + " tokenizer = AutoTokenizer.from_pretrained(name, \n", + " use_fast=True,\n", + " add_prefix_space=True)\n", + " \n", + " try:\n", + " model = AutoModelForTokenClassification.from_pretrained(name,\n", + " config=config)\n", + " except OSError:\n", + " model = AutoModelForTokenClassification.from_pretrained(name,\n", + " config=config,\n", + " from_flax=True)\n", + " \n", + " return dict(name=name, model=model, tokenizer=tokenizer)" + ], + "id": "713a522b-511c-4dd3-9948-66e3bf8cb40b", + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d84b8b8a-4413-4778-b26e-ab6bf76c0798" + }, + "source": [ + "### Setting up tokenisation of the datasets" + ], + "id": "d84b8b8a-4413-4778-b26e-ab6bf76c0798" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "143a3ba2-fdac-44fb-8130-0524866a01a0" + }, + "source": [ + "We start by converting the datasets into the HuggingFace format." + ], + "id": "143a3ba2-fdac-44fb-8130-0524866a01a0" + }, + { + "cell_type": "code", + "metadata": { + "id": "d8945416-1869-424f-b146-0c5848611305" + }, + "source": [ + "train_dataset = Dataset.from_dict(dict(docs=train_docs, orig_labels=train_labels))\n", + "val_dataset = Dataset.from_dict(dict(docs=val_docs, orig_labels=val_labels))\n", + "test_dataset = Dataset.from_dict(dict(docs=test_docs, orig_labels=test_labels))" + ], + "id": "d8945416-1869-424f-b146-0c5848611305", + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c8f7a029-f01e-4159-a317-47c9eba7fbfa" + }, + "source": [ + "Next, we define a function which tokenises the dataset as well as aligning it with the labels in the dataset." + ], + "id": "c8f7a029-f01e-4159-a317-47c9eba7fbfa" + }, + { + "cell_type": "code", + "metadata": { + "id": "fbf1ee90-5222-4840-9c61-43ace2e9abe3" + }, + "source": [ + "def tokenize_and_align_labels(examples: dict, tokenizer) -> dict:\n", + " '''Tokenize all texts and align the labels with them'''\n", + " tokenized_inputs = tokenizer(\n", + " examples['docs'],\n", + " # We use this argument because the texts in our dataset are lists of words (with a label for each word).\n", + " is_split_into_words=True,\n", + " )\n", + " labels = []\n", + " for i, label in enumerate(examples['orig_labels']):\n", + " word_ids = tokenized_inputs.word_ids(batch_index=i)\n", + " previous_word_idx = None\n", + " label_ids = []\n", + " for word_idx in word_ids:\n", + " # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n", + " # ignored in the loss function.\n", + " if word_idx is None:\n", + " label_ids.append(-100)\n", + " # We set the label for the first token of each word.\n", + " elif word_idx != previous_word_idx:\n", + " label_ids.append(label2id[label[word_idx]])\n", + " # For the other tokens in a word, we set the label to either the current label or -100, depending on\n", + " # the label_all_tokens flag.\n", + " else:\n", + " label_ids.append(-100)#label2id[label[word_idx]])\n", + " previous_word_idx = word_idx\n", + "\n", + " labels.append(label_ids)\n", + " tokenized_inputs[\"labels\"] = labels\n", + " return tokenized_inputs\n", + "\n", + "def tokenize_dataset(dataset: Dataset, tokenizer) -> Dataset:\n", + " return dataset.map(partial(tokenize_and_align_labels, tokenizer=tokenizer),\n", + " batched=True,\n", + " num_proc=4,\n", + " desc=\"Running tokenizer on dataset\")" + ], + "id": "fbf1ee90-5222-4840-9c61-43ace2e9abe3", + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e508cfec-a830-416d-901e-0a6b5ce67598" + }, + "source": [ + "Just to see that it worked, let's have a look at a tokenized dataset." + ], + "id": "e508cfec-a830-416d-901e-0a6b5ce67598" + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 450, + "referenced_widgets": [ + "b3ac84dcf48f4ba8a65aecd1df5a1b68", + "490a5358971a45fa92989776dc6757c3", + "f5be7a9c6f1941659c806255a9315b7c", + "9eb5033009ee441eb164d862b4b2c39c", + "ed07265cff0c4c82b95e0d0c47359edb", + "46a87c4067114a31ae6e34c3a9464f76", + "04a42f0ccb10413494e891907fc547d9", + "2188756da7e74badb48a459ea15c02d5", + "a1db2167447249ed95dfb78a97c24bf9", + "68ce97d8904e4b398af538a7ad1ed1ea", + "245bfeb4fb5542f9900cebd0e3cccc74", + "501c0ab625ed4f0bb9954de0b97e90f1", + "f8140d5334e14d7988c17bbbae05b08e", + "0b6cce6d1a0e4de48ae7eb967dfeda87", + "bac6501d76ce4b5ba56ab1effcedccbb", + "cf17e5af2e7e407e9d96e0325ad733de", + "d603ce6680d3425e8c145e77bc0e0e30", + "bbd848e83f30482dab926e53a7188f37", + "29c28419ac2848ca8ce6f73eea0e3425", + "ac01f9efc2ec4eb2af6dfd956467ab8e", + "82c474271a584a46b9af812bd9947ff7", + "5dea54f5878c4ea1b91aea2d6c01dcc9", + "1f5a386436f142999c9cd61a5567167f", + "4d13f40b545245d88a9ce9cfa738a59c", + "bacd59325efe4e7a8289e2e77eca3f97", + "b6493940baf04dd6a461abd3d123d20d", + "1b5c2a40effd4df8a0e29f7589e4ce69", + "516c9f479605404886582c4af2d4860d", + "7d5cce86ee7c4016a6cff03887038660", + "36aa47fe9e22473ea71a0b6a4d740b35", + "b98d5bb115ce4458919bc628a7c453cc", + "6437a7817c244f54b4ca06d78d6aeff7", + "3ac65aaeae574af5b7eea30b4a873ec2", + "8d99822c6a514136aa164846d039bbfc", + "105f722c571f47b2b7a7184c9fe45c18", + "4a6ab6cdeb0943ba8cf8486caadf2f8d", + "4caf7f4e912d454b8ecf3f3971ede95e", + "36873c0c51904573809bb3eadd172383", + "15d31d2a49804328b4c80fa98dae8ff1", + "7fa7cbad599741edbf0c099bf2668494", + "f19e664d094f46f0a94d7410685d67eb", + "74626cdc554848fab75607aed0324aa3", + "f2bcfb2929594116b9367a18a1778aa2", + "709d06d2c8614f9b97bf86ce8ed1118f", + "65ca9ae8108d46a1999573c3267f16bc", + "520e4029bca14128a6eb4e2bbd4c78ed", + "b8e8db36327e4706b6ab435a698cb3fd", + "7d948bdb11c0427b8c49c48e7c5d9772" + ] + }, + "id": "4f6b4d78-f060-4b98-b022-a4182f0617c3", + "outputId": "3c1e9613-7224-464b-e43f-90f568364b4b" + }, + "source": [ + "tokenizer = AutoTokenizer.from_pretrained('flax-community/roberta-base-danish', \n", + " use_fast=True,\n", + " add_prefix_space=True)\n", + "tokenized_train = tokenize_dataset(train_dataset, tokenizer)\n", + "print(f'Sample document:')\n", + "print(list(zip(tokenized_train[0][\"docs\"], tokenized_train[0][\"orig_labels\"])))\n", + "print()\n", + "print(f'Tokenized document:')\n", + "print(list(zip([tokenizer.decode(tok).strip() for tok in tokenized_train[0][\"input_ids\"]], \n", + " [id2label[id] for id in tokenized_train[0][\"labels\"] if id != -100])))" + ], + "id": "4f6b4d78-f060-4b98-b022-a4182f0617c3", + "execution_count": 8, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b3ac84dcf48f4ba8a65aecd1df5a1b68", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=618.0, style=ProgressStyle(description_…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a1db2167447249ed95dfb78a97c24bf9", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1388356.0, style=ProgressStyle(descript…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d603ce6680d3425e8c145e77bc0e0e30", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bacd59325efe4e7a8289e2e77eca3f97", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3ac65aaeae574af5b7eea30b4a873ec2", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f19e664d094f46f0a94d7410685d67eb", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "Sample document:\n", + "[('På', 'O'), ('fredag', 'O'), ('har', 'O'), ('SID', 'B-ORG'), ('inviteret', 'O'), ('til', 'O'), ('reception', 'O'), ('i', 'O'), ('SID-huset', 'B-LOC'), ('i', 'O'), ('anledning', 'O'), ('af', 'O'), ('at', 'O'), ('formanden', 'O'), ('Kjeld', 'B-PER'), ('Christensen', 'I-PER'), ('går', 'O'), ('ind', 'O'), ('i', 'O'), ('de', 'O'), ('glade', 'O'), ('tressere', 'O'), ('.', 'O')]\n", + "\n", + "Tokenized document:\n", + "[('På', 'O'), ('fredag', 'O'), ('har', 'O'), ('SID', 'B-ORG'), ('inviteret', 'O'), ('til', 'O'), ('reception', 'O'), ('i', 'O'), ('SID', 'B-LOC'), ('-', 'O'), ('huset', 'O'), ('i', 'O'), ('anledning', 'O'), ('af', 'O'), ('at', 'B-PER'), ('formanden', 'I-PER'), ('Kjeld', 'O'), ('Christensen', 'O'), ('går', 'O'), ('ind', 'O'), ('i', 'O'), ('de', 'O'), ('glade', 'O')]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5be05dc0-94c2-41e2-ad36-464c846e034e" + }, + "source": [ + "### Finetuning the models" + ], + "id": "5be05dc0-94c2-41e2-ad36-464c846e034e" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "515c11c5-51a8-4323-847d-7ca67178f1ef" + }, + "source": [ + "We now set up the actual finetuning of the models. We will be employing the `Trainer` class from the `transformers` library, and the following `compute_metrics` helper function is used during training to compute the metrics that we are interested in." + ], + "id": "515c11c5-51a8-4323-847d-7ca67178f1ef" + }, + { + "cell_type": "code", + "metadata": { + "id": "75aadb73-a073-48bb-b808-a3f228556db2", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "1de82500b0d34e5c9f6c5f995f27ea03", + "c7e9741c804e421898f9c45cde1ce7cd", + "5178228787094bfba3b671af67e3df0f", + "3aa19fa9dda74118844e11a876039a0b", + "2afa7d80cc004014b4a672bc0b683fce", + "b2757df8c2e14c7d90befd479221e5c5", + "9ada8d8e30114064be38f3b5d4645f36", + "a391191463d0455c8fd4f83b8ae69c8f" + ] + }, + "outputId": "b470d1a4-d845-424a-e2c9-16d87b52a1f9" + }, + "source": [ + "# Initialise metric\n", + "metric = load_metric(\"seqeval\")\n", + "\n", + "def compute_metrics(p):\n", + " '''Helper function for computing metrics'''\n", + " predictions, labels = p\n", + " predictions = np.argmax(predictions, axis=-1)\n", + "\n", + " # Remove ignored index (special tokens)\n", + " true_predictions = [\n", + " [id2label[p] for (p, l) in zip(prediction, label) if l != -100]\n", + " for prediction, label in zip(predictions, labels)\n", + " ]\n", + " true_labels = [\n", + " [id2label[l] for (p, l) in zip(prediction, label) if l != -100]\n", + " for prediction, label in zip(predictions, labels)\n", + " ]\n", + "\n", + " results = metric.compute(predictions=true_predictions, references=true_labels)\n", + " return dict(precision=results[\"overall_precision\"],\n", + " recall=results[\"overall_recall\"],\n", + " f1=results[\"overall_f1\"],\n", + " accuracy=results[\"overall_accuracy\"])" + ], + "id": "75aadb73-a073-48bb-b808-a3f228556db2", + "execution_count": 9, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1de82500b0d34e5c9f6c5f995f27ea03", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2482.0, style=ProgressStyle(description…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cdc47b49-6227-498a-835f-caa38e5e7796" + }, + "source": [ + "The following script then tokenises the dataset using the specified tokeniser, and starts finetuning on the DaNE dataset." + ], + "id": "cdc47b49-6227-498a-835f-caa38e5e7796" + }, + { + "cell_type": "code", + "metadata": { + "id": "e4a09abf-6230-44f4-87b0-fd630b7c502f" + }, + "source": [ + "def finetune(model_name: str, \n", + " epochs: int = 10, \n", + " lr: float = 5e-5, \n", + " batch_size: int = 32,\n", + " save: bool = True):\n", + " '''Finetune a transformer model for NER on the DaNE dataset'''\n", + "\n", + " # Fetch the model and tokenizer\n", + " model_dict = prepare_model(model_name)\n", + " \n", + " # Tokenize the datasets\n", + " tokenized_train = tokenize_dataset(train_dataset, model_dict['tokenizer'])\n", + " tokenized_val = tokenize_dataset(val_dataset, model_dict['tokenizer'])\n", + " tokenized_test = tokenize_dataset(test_dataset, model_dict['tokenizer'])\n", + " \n", + " # Initialise the data collator\n", + " data_collator = DataCollatorForTokenClassification(model_dict['tokenizer'])\n", + " \n", + " # Initialise training arguments\n", + " training_args = TrainingArguments(output_dir=f'../models/{model_dict[\"name\"]}-ner-dane',\n", + " evaluation_strategy='epoch',\n", + " logging_strategy='epoch',\n", + " save_strategy='epoch' if save else 'no',\n", + " per_device_train_batch_size=batch_size,\n", + " per_device_eval_batch_size=batch_size,\n", + " gradient_accumulation_steps=1,\n", + " learning_rate=lr,\n", + " num_train_epochs=epochs,\n", + " warmup_steps=50,\n", + " report_to='all',\n", + " load_best_model_at_end=True)\n", + " \n", + " # Initialise Trainer\n", + " trainer = Trainer(model=model_dict['model'],\n", + " args=training_args,\n", + " train_dataset=tokenized_train,\n", + " eval_dataset=tokenized_val,\n", + " tokenizer=model_dict['tokenizer'],\n", + " data_collator=data_collator,\n", + " compute_metrics=compute_metrics)\n", + " \n", + " # Finetune the model\n", + " train_result = trainer.train()\n", + " \n", + " # Log training metrics and save the state\n", + " metrics = train_result.metrics\n", + " trainer.log_metrics(\"train\", metrics)\n", + " trainer.save_metrics(\"train\", metrics)\n", + " trainer.save_state()\n", + " \n", + " # Log validation metrics\n", + " metrics = trainer.evaluate()\n", + " trainer.log_metrics(\"eval\", metrics)\n", + " trainer.save_metrics(\"eval\", metrics)\n", + " \n", + " # Log test metrics\n", + " predictions, labels, metrics = trainer.predict(test_dataset, metric_key_prefix=\"predict\")\n", + " predictions = np.argmax(predictions, axis=-1)\n", + " trainer.log_metrics(\"test\", metrics)\n", + " trainer.save_metrics(\"test\", metrics)" + ], + "id": "e4a09abf-6230-44f4-87b0-fd630b7c502f", + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "mm-FMWWblx1O" + }, + "source": [ + "model_names = dict(danish='flax-community/roberta-base-danish',\n", + " scandi='flax-community/roberta-large-scandi',#'Maltehb/roberta-base-scandinavian',\n", + " multi='xlm-roberta-base',\n", + " multilarge='xlm-roberta-large',\n", + " botxo='Maltehb/danish-bert-botxo')" + ], + "id": "mm-FMWWblx1O", + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "47aef8e786ea416e8fa99869a46d008f", + "eb587b328a7e4a88bffad029b4943a1e", + "bf71a524605b4a63b33468e43b212b62", + "e9a69e05e82e48feb3f203e8ac7b7afa", + "f3fa0e5bd0b34ffe92627a1615447d79", + "03b37083ef7642f2942dd97b6e090c33", + "18d6f6e2b90747258a2082796f2eeda9", + "e33495db5c3343a999c9c6d807abc238", + "ceb189e178b24e03b398b7ba37e63a02", + "7b85ed3a5f034cbcbb3d87d9e5ac807d", + "81f7f3a6c8dc4c9e8320fbcc8b4aa270", + "f1187ab3d4a1491dacbf7f109810bf3c", + "43d1cd650b26471cbf07beb1733b943a", + "71410f8bf2d348ea9f3f173ea70c7829", + "f8ae53cbfe38498095d7f7e2ee6b1b89", + "dd961e0fdc604f0883ac6a674617b7ac", + "e9a04e6cb6094f00b75a248aebc11dcf", + "3a118dd7adf3459db52afc86e39e4681", + "59dae515b4804e3f8a64372ec0cd5254", + "a835eb4e43174c178a43fc20cf878f67", + "bbd988fc15734daf834d7c1be0898f54", + "59a87b9b20fd43e2ad4df96508e2a4e8", + "0c06e7199e2440ff8df24c7f4d3816bf", + "1721231f136a41a89226a6993e770708", + "ea1a49df91de49b39d7c28620d396ed3", + "f43ced84b4ae4d0290406ff29ac319b6", + "249eab1d100f4e66891404bd0293cfac", + "de5f4a79619b4361adb69ebf75f480d3", + "38baac16961b491d99a195ec08b7146c", + "31550c6938b245bdaff271f77e36a265", + "86d213b1b67649e28127ca8919d1b508", + "3833a9687f8440c8bc7159be7c5da6aa", + "0eb974fa255b4b378c19580c9b443242", + "72b317f6253640efa451b41609f418cc", + "9dbc6bdd1fcd4258b27b0b36f537a52b", + "48eab5fd5f5e4f4f9b1517a3f2e53052", + "562f962ecfe1438ba3bd67406c3bc415", + "e5c84b81d24a4120ad364cf7a8d92b13", + "12ba4202432a4d9da9b4295bee72ffac", + "4b846d71afd943278c1ac0cecfa13f65", + "c23a9b479ca54b5082ced5e0359f54d9", + "ce19ae483e1c4a9b90b3fecfea1330e4", + "e953e23fd1364232b6cd39ec11ccaecb", + "75b304f6ca5945b295a64fafcf283c4e", + "cb932f442d0c4c8b959e455cc0ce1329", + "5fa39708c9684ce3b93577f1a0197bf4", + "4e7a38f355d84811a66936be929d270d", + "50499dbed5cb470a9b5dd7f4e5144405", + "1d3024c3e0374116bb856902590f6bfd", + "1a406ffa02dc479aa0fcdfa41afcbb0b", + "9f6792ca58ed4906bb671ae049e2b860", + "54ef5c70c3f146e1a1fbd82f8aeedbb9", + "ed8c1a4ff520407eb35402fb4e5fcc71", + "68819af7768e486da1752ff27d2e4104", + "4039c305be024488abecf4a635345c46", + "80622e53e4124d0cbd506542f81f4818", + "2cad6e60bb414f46b79720dfc4f87f9e", + "fc6b7d2794bb44cdabafdfba208b8dc3", + "f8fd5a767ff3404ebbadc6cc63110445", + "8b638f038ef04ea7b2ad3bdcb90f0141", + "305833c39a074d0296ef4511b6f61e7e", + "f31cad3887d640d8b4322598eca1ca70", + "02ecdd0d12614752a19b294395cb821c", + "4c62a1fd18a54327a1f50bf3ca087027", + "9daad6aa599343278ca5198897b390b3", + "b803d1d51df9466db862f96fb8e49257", + "266b34a7ee284ac590ffa999f1351330", + "93a03570b7bf4f3f8494f7b5677a7c70", + "543ff4b83d804f52afbcb7a57ca22570", + "468d17728336411db088c69330f4de68", + "fe17854a2dbe4949aeb4efb309cab6fc", + "a1feb0e41e2f47e28f550ed81b61e952", + "ca9eb397fcd640b48838e58579b475bc", + "4234b3e6815040af90d298f6ab2a808e", + "30739849e0db45daa664d8992ab6f458", + "24dc9ed72f8048fe8b174f2f1ef6e2ca", + "641876e099e84c44854262d5e709409e", + "f8ab57bd0aba4f608a8f7549baa4ead4", + "75b2a86adb434e8cb3358a3461be99e6", + "e005269e95704fae8ff9ebe8f932d023", + "0217f8a6209948cb9334eeaf5c489b1b", + "8589613c58db44bc959fc43c003dc689", + "168ee56a9f65422ca6c503ea29e8a825", + "b5f3ea592806493abe87f29fbe8059fb", + "a53357f0785046afbbfcc408a5aa0f19", + "3c4b3cd5af584ad584da81f19a26eec0", + "9097e5ba3c00454c823871d38393907b", + "5d0f563e826c4ef79ff1ed6cc7ab1c01", + "9de41b790b2c4df88309b8e6e66972cc", + "26274dbb3a324f3ca7a2a91ef24adb83", + "783f5d6b893d4f2e968c249605773c9e", + "1a597fd54ab04a4cb80147e93abc72b8", + "f317aee64c0f43a08c8157413d7956e1", + "1f37c1324fba4488a0d19cc1d623f887", + "584ec18135294eb9a916c85385eb4bea", + "8e67b29112a04c48a978cf648904ee61", + "ece5245bfb5c4b02aa1e2527974f1cc6", + "ab4e0611c3d7428b9ffa703f794852a7", + "0df9c4b4ed0240f2871906f72e9e9839", + "5731d37f432b4713b9b8be897b1f6686", + "fa9d6f6c384241f192660b2668956ad4", + "c04cd9397ecf4de69d5e7fc39311629c", + "669706d500bc404ca65e16f939db02c5", + "fc71353cc3b5434ba3c32d5c64069757" + ] + }, + "id": "e863e244-a332-46e7-8bbe-fce0d0b46c57", + "outputId": "5c9bf179-dded-42cd-9af3-ad7aba2dcee4" + }, + "source": [ + "finetune(model_names['multilarge'], epochs=25, lr=5e-5, batch_size=32, save=False)" + ], + "id": "e863e244-a332-46e7-8bbe-fce0d0b46c57", + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "404 Client Error: Not Found for url: https://huggingface.co/flax-community/roberta-large-scandi/resolve/main/pytorch_model.bin\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "47aef8e786ea416e8fa99869a46d008f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=498796983.0, style=ProgressStyle(descri…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/transformers/modeling_flax_pytorch_utils.py:201: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:180.)\n", + " pt_model_dict[flax_key] = torch.from_numpy(flax_tensor)\n", + "Some weights of the Flax model were not used when initializing the PyTorch model RobertaForTokenClassification: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).\n", + "Some weights of RobertaForTokenClassification were not initialized from the Flax model and are newly initialized: ['classifier.weight', 'classifier.bias', 'roberta.embeddings.position_ids']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ceb189e178b24e03b398b7ba37e63a02", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e9a04e6cb6094f00b75a248aebc11dcf", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ea1a49df91de49b39d7c28620d396ed3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0eb974fa255b4b378c19580c9b443242", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=2.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c23a9b479ca54b5082ced5e0359f54d9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1d3024c3e0374116bb856902590f6bfd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2cad6e60bb414f46b79720dfc4f87f9e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + " \n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9daad6aa599343278ca5198897b390b3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + "\n", + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ca9eb397fcd640b48838e58579b475bc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #0', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0217f8a6209948cb9334eeaf5c489b1b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #1', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + " \n", + " " + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9de41b790b2c4df88309b8e6e66972cc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #2', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ece5245bfb5c4b02aa1e2527974f1cc6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Running tokenizer on dataset #3', max=1.0, style=Progress…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n", + "\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: docs, orig_labels.\n", + "***** Running training *****\n", + " Num examples = 4383\n", + " Num Epochs = 25\n", + " Instantaneous batch size per device = 32\n", + " Total train batch size (w. parallel, distributed & accumulation) = 32\n", + " Gradient Accumulation steps = 1\n", + " Total optimization steps = 3425\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "
Epoch | \n", + "Training Loss | \n", + "Validation Loss | \n", + "Precision | \n", + "Recall | \n", + "F1 | \n", + "Accuracy | \n", + "
---|---|---|---|---|---|---|
1 | \n", + "0.476500 | \n", + "0.181083 | \n", + "0.408898 | \n", + "0.402083 | \n", + "0.405462 | \n", + "0.956252 | \n", + "
2 | \n", + "0.138400 | \n", + "0.076793 | \n", + "0.689720 | \n", + "0.768750 | \n", + "0.727094 | \n", + "0.979965 | \n", + "
3 | \n", + "0.075900 | \n", + "0.061824 | \n", + "0.748062 | \n", + "0.804167 | \n", + "0.775100 | \n", + "0.983062 | \n", + "
4 | \n", + "0.050200 | \n", + "0.058904 | \n", + "0.773694 | \n", + "0.833333 | \n", + "0.802407 | \n", + "0.985192 | \n", + "
5 | \n", + "0.034500 | \n", + "0.055440 | \n", + "0.814961 | \n", + "0.862500 | \n", + "0.838057 | \n", + "0.986837 | \n", + "
6 | \n", + "0.025200 | \n", + "0.056832 | \n", + "0.803502 | \n", + "0.860417 | \n", + "0.830986 | \n", + "0.986643 | \n", + "
7 | \n", + "0.018300 | \n", + "0.058509 | \n", + "0.804000 | \n", + "0.837500 | \n", + "0.820408 | \n", + "0.986643 | \n", + "
"
+ ],
+ "text/plain": [
+ " "
+ ],
+ "text/plain": [
+ "\n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Epoch \n",
+ " Training Loss \n",
+ " Validation Loss \n",
+ " Precision \n",
+ " Recall \n",
+ " F1 \n",
+ " Accuracy \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 0.476500 \n",
+ " 0.181083 \n",
+ " 0.408898 \n",
+ " 0.402083 \n",
+ " 0.405462 \n",
+ " 0.956252 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 0.138400 \n",
+ " 0.076793 \n",
+ " 0.689720 \n",
+ " 0.768750 \n",
+ " 0.727094 \n",
+ " 0.979965 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 0.075900 \n",
+ " 0.061824 \n",
+ " 0.748062 \n",
+ " 0.804167 \n",
+ " 0.775100 \n",
+ " 0.983062 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 0.050200 \n",
+ " 0.058904 \n",
+ " 0.773694 \n",
+ " 0.833333 \n",
+ " 0.802407 \n",
+ " 0.985192 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 0.034500 \n",
+ " 0.055440 \n",
+ " 0.814961 \n",
+ " 0.862500 \n",
+ " 0.838057 \n",
+ " 0.986837 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 0.025200 \n",
+ " 0.056832 \n",
+ " 0.803502 \n",
+ " 0.860417 \n",
+ " 0.830986 \n",
+ " 0.986643 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 0.018300 \n",
+ " 0.058509 \n",
+ " 0.804000 \n",
+ " 0.837500 \n",
+ " 0.820408 \n",
+ " 0.986643 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 0.013300 \n",
+ " 0.063613 \n",
+ " 0.832347 \n",
+ " 0.879167 \n",
+ " 0.855117 \n",
+ " 0.988289 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 0.011200 \n",
+ " 0.065774 \n",
+ " 0.818182 \n",
+ " 0.881250 \n",
+ " 0.848546 \n",
+ " 0.987224 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 0.008500 \n",
+ " 0.062434 \n",
+ " 0.853119 \n",
+ " 0.883333 \n",
+ " 0.867963 \n",
+ " 0.989063 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 0.007300 \n",
+ " 0.064465 \n",
+ " 0.836292 \n",
+ " 0.883333 \n",
+ " 0.859169 \n",
+ " 0.988966 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 0.005400 \n",
+ " 0.066295 \n",
+ " 0.854291 \n",
+ " 0.891667 \n",
+ " 0.872579 \n",
+ " 0.989160 \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 0.004500 \n",
+ " 0.067713 \n",
+ " 0.850895 \n",
+ " 0.891667 \n",
+ " 0.870804 \n",
+ " 0.989644 \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 0.004100 \n",
+ " 0.068105 \n",
+ " 0.854000 \n",
+ " 0.889583 \n",
+ " 0.871429 \n",
+ " 0.989160 \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 0.003400 \n",
+ " 0.069819 \n",
+ " 0.864372 \n",
+ " 0.889583 \n",
+ " 0.876797 \n",
+ " 0.989837 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 0.002700 \n",
+ " 0.074552 \n",
+ " 0.856275 \n",
+ " 0.881250 \n",
+ " 0.868583 \n",
+ " 0.989063 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 0.002500 \n",
+ " 0.074190 \n",
+ " 0.873727 \n",
+ " 0.893750 \n",
+ " 0.883625 \n",
+ " 0.989741 \n",
+ " \n",
+ " \n",
+ " 18 \n",
+ " 0.001800 \n",
+ " 0.074841 \n",
+ " 0.860000 \n",
+ " 0.895833 \n",
+ " 0.877551 \n",
+ " 0.988870 \n",
+ " \n",
+ " \n",
+ " 19 \n",
+ " 0.001700 \n",
+ " 0.074929 \n",
+ " 0.870707 \n",
+ " 0.897917 \n",
+ " 0.884103 \n",
+ " 0.989741 \n",
+ " \n",
+ " \n",
+ " 20 \n",
+ " 0.001800 \n",
+ " 0.078682 \n",
+ " 0.855422 \n",
+ " 0.887500 \n",
+ " 0.871166 \n",
+ " 0.989160 \n",
+ " \n",
+ " \n",
+ " \n",
+ "21 \n",
+ " 0.001600 \n",
+ " 0.076686 \n",
+ " 0.866935 \n",
+ " 0.895833 \n",
+ " 0.881148 \n",
+ " 0.989741 \n",
+ "