{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "gpt-j-6b bias+norm fit", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard", "widgets": { "application/vnd.jupyter.widget-state+json": { "fb16c2d89a604ac9aa02fcb3d74adb09": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_072401c372ac419f820d85b4be1a0030", "IPY_MODEL_2cbe225630824a3aa86246f4a8f8da20", "IPY_MODEL_a1f5872891014769ac0fc3eb7d92b299", "IPY_MODEL_31fbb95e53ff4b0f8143c7e6f0b6ce0a" ], "layout": "IPY_MODEL_807ed67fcda14b69b93ccab3dc80739d" } }, "072401c372ac419f820d85b4be1a0030": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f75bcf9462354eb3980e8f833d52e098", "placeholder": "", "style": "IPY_MODEL_c779b66f63514b6ba0761623c5b463b4", "value": "
Step | \n", "Training Loss | \n", "
---|
"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to gpt-j-6b-finetune/checkpoint-147\n",
"Configuration saved in gpt-j-6b-finetune/checkpoint-147/config.json\n",
"Model weights saved in gpt-j-6b-finetune/checkpoint-147/pytorch_model.bin\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=147, training_loss=1.665000240818984, metrics={'train_runtime': 2828.7347, 'train_samples_per_second': 0.417, 'train_steps_per_second': 0.052, 'total_flos': 1555992281088.0, 'train_loss': 1.665000240818984, 'epoch': 1.0})"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"model.push_to_hub(checkpoint_name)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 787,
"referenced_widgets": [
"06f743565b9c4c7f9c7141c88f7ef059",
"aaaf08a4ada941b994cf5e80ceae7b52",
"a0634d0761f14a5692372507914d87dc",
"2f6c3d24340d45b3a3d32298e3c901b7",
"dae3847d9a934f50a023138201626434",
"f232c746f99345f79947ec73a8b1974c",
"d292df43a4a84d76aab51d1c065d5760",
"46f1773f3dc94d609fff7493e5bf3e36",
"4fbead058bb94bebafedcac695e101dc",
"e00b996f586c4956b573c17f21c1dc93",
"c26f6de037dd4237b1594e901420aaff",
"fb28a1a3e54a43319b6b148ebd05ad5a",
"769b93b3e9bd4250a17d6961963ccb60",
"02c0f3fa6c1d4792adc4be1e5cc82739",
"335771ffcdfa48da84704683b3974639",
"b4a02f2d7c3b495093f64269fbbe5784",
"55db12e51f1a48ac9b19231dd65f497f",
"b58ac777248548aaa2fadd45b7e489ed",
"34c4451ad60a4555ab8dd671fc025fec",
"071388627b0343e5ba5c8da2cba33983",
"17985769a3424ad49fd6579e25c432d9",
"cf9c2106c4cb4419a7c5f8a0faf3984a"
]
},
"id": "8WCFZ0TqWN9b",
"outputId": "e7da57ce-9da5-41a9-b353-edd400dc0591"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Configuration saved in gpt-j-6b-finetune/config.json\n",
"Model weights saved in gpt-j-6b-finetune/pytorch_model.bin\n",
"Several commits (2) will be pushed upstream.\n",
"The progress bars may be unreliable.\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload file pytorch_model.bin: 0%| | 1.00/5.75G [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "06f743565b9c4c7f9c7141c88f7ef059"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload file runs/Jul13_16-45-33_20ea8f07eae3/events.out.tfevents.1657730740.20ea8f07eae3.72.0: 0%| …"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "fb28a1a3e54a43319b6b148ebd05ad5a"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"remote: error: cannot lock ref 'refs/heads/main': is at 73569b76fa2ba1cab3a2606d65b6f9617d2c073e but expected 0a2b6faed30fb6fb7672c5ddcc6a5c2c8f36b935 \n",
"To https://huggingface.co/crumb/gpt-j-6b-finetune\n",
" ! [remote rejected] main -> main (failed to update ref)\n",
"error: failed to push some refs to 'https://user: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@huggingface.co/crumb/gpt-j-6b-finetune'\n",
"\n"
]
},
{
"output_type": "error",
"ename": "OSError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/huggingface_hub/repository.py\u001b[0m in \u001b[0;36mgit_push\u001b[0;34m(self, upstream, blocking, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1160\u001b[0m raise subprocess.CalledProcessError(\n\u001b[0;32m-> 1161\u001b[0;31m \u001b[0mreturn_code\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstdout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1162\u001b[0m )\n",
"\u001b[0;31mCalledProcessError\u001b[0m: Command '['git', 'push', '--set-upstream', 'origin', 'main']' returned non-zero exit status 1.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m