diff --git "a/gpt_j_6b_bias+norm_fit.ipynb" "b/gpt_j_6b_bias+norm_fit.ipynb" new file mode 100644--- /dev/null +++ "b/gpt_j_6b_bias+norm_fit.ipynb" @@ -0,0 +1,9658 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "gpt-j-6b bias+norm fit", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "gpuClass": "standard", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "fb16c2d89a604ac9aa02fcb3d74adb09": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_072401c372ac419f820d85b4be1a0030", + "IPY_MODEL_2cbe225630824a3aa86246f4a8f8da20", + "IPY_MODEL_a1f5872891014769ac0fc3eb7d92b299", + "IPY_MODEL_31fbb95e53ff4b0f8143c7e6f0b6ce0a" + ], + "layout": "IPY_MODEL_807ed67fcda14b69b93ccab3dc80739d" + } + }, + "072401c372ac419f820d85b4be1a0030": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f75bcf9462354eb3980e8f833d52e098", + "placeholder": "", + "style": "IPY_MODEL_c779b66f63514b6ba0761623c5b463b4", + "value": "
Step | \n", + "Training Loss | \n", + "
---|
"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Saving model checkpoint to gpt-j-6b-finetune/checkpoint-147\n",
+ "Configuration saved in gpt-j-6b-finetune/checkpoint-147/config.json\n",
+ "Model weights saved in gpt-j-6b-finetune/checkpoint-147/pytorch_model.bin\n",
+ "\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=147, training_loss=1.665000240818984, metrics={'train_runtime': 2828.7347, 'train_samples_per_second': 0.417, 'train_steps_per_second': 0.052, 'total_flos': 1555992281088.0, 'train_loss': 1.665000240818984, 'epoch': 1.0})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.push_to_hub(checkpoint_name)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 787,
+ "referenced_widgets": [
+ "06f743565b9c4c7f9c7141c88f7ef059",
+ "aaaf08a4ada941b994cf5e80ceae7b52",
+ "a0634d0761f14a5692372507914d87dc",
+ "2f6c3d24340d45b3a3d32298e3c901b7",
+ "dae3847d9a934f50a023138201626434",
+ "f232c746f99345f79947ec73a8b1974c",
+ "d292df43a4a84d76aab51d1c065d5760",
+ "46f1773f3dc94d609fff7493e5bf3e36",
+ "4fbead058bb94bebafedcac695e101dc",
+ "e00b996f586c4956b573c17f21c1dc93",
+ "c26f6de037dd4237b1594e901420aaff",
+ "fb28a1a3e54a43319b6b148ebd05ad5a",
+ "769b93b3e9bd4250a17d6961963ccb60",
+ "02c0f3fa6c1d4792adc4be1e5cc82739",
+ "335771ffcdfa48da84704683b3974639",
+ "b4a02f2d7c3b495093f64269fbbe5784",
+ "55db12e51f1a48ac9b19231dd65f497f",
+ "b58ac777248548aaa2fadd45b7e489ed",
+ "34c4451ad60a4555ab8dd671fc025fec",
+ "071388627b0343e5ba5c8da2cba33983",
+ "17985769a3424ad49fd6579e25c432d9",
+ "cf9c2106c4cb4419a7c5f8a0faf3984a"
+ ]
+ },
+ "id": "8WCFZ0TqWN9b",
+ "outputId": "e7da57ce-9da5-41a9-b353-edd400dc0591"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Configuration saved in gpt-j-6b-finetune/config.json\n",
+ "Model weights saved in gpt-j-6b-finetune/pytorch_model.bin\n",
+ "Several commits (2) will be pushed upstream.\n",
+ "The progress bars may be unreliable.\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Upload file pytorch_model.bin: 0%| | 1.00/5.75G [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "06f743565b9c4c7f9c7141c88f7ef059"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Upload file runs/Jul13_16-45-33_20ea8f07eae3/events.out.tfevents.1657730740.20ea8f07eae3.72.0: 0%| …"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "fb28a1a3e54a43319b6b148ebd05ad5a"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "remote: error: cannot lock ref 'refs/heads/main': is at 73569b76fa2ba1cab3a2606d65b6f9617d2c073e but expected 0a2b6faed30fb6fb7672c5ddcc6a5c2c8f36b935 \n",
+ "To https://huggingface.co/crumb/gpt-j-6b-finetune\n",
+ " ! [remote rejected] main -> main (failed to update ref)\n",
+ "error: failed to push some refs to 'https://user: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@huggingface.co/crumb/gpt-j-6b-finetune'\n",
+ "\n"
+ ]
+ },
+ {
+ "output_type": "error",
+ "ename": "OSError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/huggingface_hub/repository.py\u001b[0m in \u001b[0;36mgit_push\u001b[0;34m(self, upstream, blocking, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1160\u001b[0m raise subprocess.CalledProcessError(\n\u001b[0;32m-> 1161\u001b[0;31m \u001b[0mreturn_code\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstdout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1162\u001b[0m )\n",
+ "\u001b[0;31mCalledProcessError\u001b[0m: Command '['git', 'push', '--set-upstream', 'origin', 'main']' returned non-zero exit status 1.",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m