vitouphy commited on
Commit
4c2d6b6
1 Parent(s): 1790b2a

Training in progress, step 1

Browse files
Files changed (2) hide show
  1. Untitled.ipynb +61 -49
  2. training_args.bin +1 -1
Untitled.ipynb CHANGED
@@ -3,7 +3,7 @@
3
  {
4
  "cell_type": "code",
5
  "execution_count": 1,
6
- "id": "15d904e9",
7
  "metadata": {
8
  "collapsed": true,
9
  "jupyter": {
@@ -708,7 +708,7 @@
708
  {
709
  "cell_type": "code",
710
  "execution_count": 2,
711
- "id": "68c81e1e",
712
  "metadata": {},
713
  "outputs": [],
714
  "source": [
@@ -721,7 +721,7 @@
721
  {
722
  "cell_type": "code",
723
  "execution_count": null,
724
- "id": "b5f4cca5",
725
  "metadata": {
726
  "collapsed": true,
727
  "jupyter": {
@@ -13484,7 +13484,7 @@
13484
  {
13485
  "cell_type": "code",
13486
  "execution_count": 3,
13487
- "id": "9b57c68d",
13488
  "metadata": {},
13489
  "outputs": [],
13490
  "source": [
@@ -13505,7 +13505,7 @@
13505
  {
13506
  "cell_type": "code",
13507
  "execution_count": 32,
13508
- "id": "6371aacf",
13509
  "metadata": {},
13510
  "outputs": [
13511
  {
@@ -13527,7 +13527,7 @@
13527
  {
13528
  "cell_type": "code",
13529
  "execution_count": 33,
13530
- "id": "214624ba",
13531
  "metadata": {},
13532
  "outputs": [],
13533
  "source": [
@@ -13542,7 +13542,7 @@
13542
  {
13543
  "cell_type": "code",
13544
  "execution_count": 6,
13545
- "id": "0c890a5b",
13546
  "metadata": {},
13547
  "outputs": [],
13548
  "source": [
@@ -13558,7 +13558,7 @@
13558
  {
13559
  "cell_type": "code",
13560
  "execution_count": 7,
13561
- "id": "9ad59bfe",
13562
  "metadata": {},
13563
  "outputs": [],
13564
  "source": [
@@ -13570,7 +13570,7 @@
13570
  {
13571
  "cell_type": "code",
13572
  "execution_count": 8,
13573
- "id": "d0b3000d",
13574
  "metadata": {},
13575
  "outputs": [
13576
  {
@@ -13592,7 +13592,7 @@
13592
  {
13593
  "cell_type": "code",
13594
  "execution_count": 9,
13595
- "id": "3067a0d2",
13596
  "metadata": {},
13597
  "outputs": [],
13598
  "source": [
@@ -13602,7 +13602,7 @@
13602
  {
13603
  "cell_type": "code",
13604
  "execution_count": 34,
13605
- "id": "be87dadf",
13606
  "metadata": {},
13607
  "outputs": [
13608
  {
@@ -13650,7 +13650,7 @@
13650
  {
13651
  "cell_type": "code",
13652
  "execution_count": 35,
13653
- "id": "42e56c1e",
13654
  "metadata": {},
13655
  "outputs": [],
13656
  "source": [
@@ -13661,7 +13661,7 @@
13661
  {
13662
  "cell_type": "code",
13663
  "execution_count": 36,
13664
- "id": "dc7ff75c",
13665
  "metadata": {},
13666
  "outputs": [
13667
  {
@@ -13679,7 +13679,7 @@
13679
  {
13680
  "cell_type": "code",
13681
  "execution_count": 37,
13682
- "id": "8132901b",
13683
  "metadata": {},
13684
  "outputs": [
13685
  {
@@ -13706,7 +13706,7 @@
13706
  {
13707
  "cell_type": "code",
13708
  "execution_count": 38,
13709
- "id": "501e426e",
13710
  "metadata": {},
13711
  "outputs": [],
13712
  "source": [
@@ -13718,7 +13718,7 @@
13718
  {
13719
  "cell_type": "code",
13720
  "execution_count": 39,
13721
- "id": "8ea339c9",
13722
  "metadata": {},
13723
  "outputs": [],
13724
  "source": [
@@ -13728,7 +13728,7 @@
13728
  {
13729
  "cell_type": "code",
13730
  "execution_count": 40,
13731
- "id": "98633ad2",
13732
  "metadata": {},
13733
  "outputs": [],
13734
  "source": [
@@ -13740,7 +13740,7 @@
13740
  {
13741
  "cell_type": "code",
13742
  "execution_count": 41,
13743
- "id": "1ec1e98a",
13744
  "metadata": {},
13745
  "outputs": [
13746
  {
@@ -13761,7 +13761,7 @@
13761
  {
13762
  "cell_type": "code",
13763
  "execution_count": 26,
13764
- "id": "9fdebef6",
13765
  "metadata": {},
13766
  "outputs": [],
13767
  "source": [
@@ -13778,7 +13778,7 @@
13778
  {
13779
  "cell_type": "code",
13780
  "execution_count": 27,
13781
- "id": "c6c32f96",
13782
  "metadata": {},
13783
  "outputs": [
13784
  {
@@ -13818,7 +13818,7 @@
13818
  {
13819
  "cell_type": "code",
13820
  "execution_count": 48,
13821
- "id": "fb722007",
13822
  "metadata": {},
13823
  "outputs": [],
13824
  "source": [
@@ -13829,7 +13829,7 @@
13829
  {
13830
  "cell_type": "code",
13831
  "execution_count": 50,
13832
- "id": "11558fd2",
13833
  "metadata": {},
13834
  "outputs": [
13835
  {
@@ -13854,7 +13854,7 @@
13854
  {
13855
  "cell_type": "code",
13856
  "execution_count": 51,
13857
- "id": "12c81313",
13858
  "metadata": {},
13859
  "outputs": [
13860
  {
@@ -13901,7 +13901,7 @@
13901
  {
13902
  "cell_type": "code",
13903
  "execution_count": 52,
13904
- "id": "1cb97a7e",
13905
  "metadata": {},
13906
  "outputs": [],
13907
  "source": [
@@ -13923,7 +13923,7 @@
13923
  {
13924
  "cell_type": "code",
13925
  "execution_count": 53,
13926
- "id": "d7bd14b2",
13927
  "metadata": {},
13928
  "outputs": [],
13929
  "source": [
@@ -13934,7 +13934,7 @@
13934
  {
13935
  "cell_type": "code",
13936
  "execution_count": 41,
13937
- "id": "00c75e3c",
13938
  "metadata": {},
13939
  "outputs": [],
13940
  "source": [
@@ -13946,7 +13946,7 @@
13946
  {
13947
  "cell_type": "code",
13948
  "execution_count": 54,
13949
- "id": "8906918e",
13950
  "metadata": {},
13951
  "outputs": [],
13952
  "source": [
@@ -14005,7 +14005,7 @@
14005
  {
14006
  "cell_type": "code",
14007
  "execution_count": 55,
14008
- "id": "40ff3940",
14009
  "metadata": {},
14010
  "outputs": [],
14011
  "source": [
@@ -14015,7 +14015,7 @@
14015
  {
14016
  "cell_type": "code",
14017
  "execution_count": 56,
14018
- "id": "2a579f5a",
14019
  "metadata": {},
14020
  "outputs": [],
14021
  "source": [
@@ -14026,7 +14026,7 @@
14026
  {
14027
  "cell_type": "code",
14028
  "execution_count": 57,
14029
- "id": "221d53c7",
14030
  "metadata": {},
14031
  "outputs": [],
14032
  "source": [
@@ -14050,7 +14050,7 @@
14050
  {
14051
  "cell_type": "code",
14052
  "execution_count": null,
14053
- "id": "87cf1d87",
14054
  "metadata": {},
14055
  "outputs": [],
14056
  "source": [
@@ -14060,7 +14060,7 @@
14060
  {
14061
  "cell_type": "code",
14062
  "execution_count": null,
14063
- "id": "d8afce23",
14064
  "metadata": {},
14065
  "outputs": [],
14066
  "source": []
@@ -14068,7 +14068,7 @@
14068
  {
14069
  "cell_type": "code",
14070
  "execution_count": 58,
14071
- "id": "22a21f3f",
14072
  "metadata": {},
14073
  "outputs": [
14074
  {
@@ -14102,7 +14102,7 @@
14102
  {
14103
  "cell_type": "code",
14104
  "execution_count": 59,
14105
- "id": "1688901e",
14106
  "metadata": {},
14107
  "outputs": [
14108
  {
@@ -14120,10 +14120,19 @@
14120
  },
14121
  {
14122
  "cell_type": "code",
14123
- "execution_count": 60,
14124
- "id": "e13830f8",
14125
  "metadata": {},
14126
- "outputs": [],
 
 
 
 
 
 
 
 
 
14127
  "source": [
14128
  "from transformers import TrainingArguments\n",
14129
  "\n",
@@ -14136,8 +14145,8 @@
14136
  " gradient_checkpointing=True,\n",
14137
  " fp16=True,\n",
14138
  " num_train_epochs=30,\n",
14139
- " save_steps=100,\n",
14140
- " eval_steps=100,\n",
14141
  " logging_steps=100,\n",
14142
  " learning_rate=3e-5,\n",
14143
  " warmup_steps=500,\n",
@@ -14148,8 +14157,8 @@
14148
  },
14149
  {
14150
  "cell_type": "code",
14151
- "execution_count": 63,
14152
- "id": "2a405bb3",
14153
  "metadata": {},
14154
  "outputs": [
14155
  {
@@ -14178,7 +14187,7 @@
14178
  {
14179
  "cell_type": "code",
14180
  "execution_count": null,
14181
- "id": "e9a99c77",
14182
  "metadata": {},
14183
  "outputs": [
14184
  {
@@ -14203,8 +14212,8 @@
14203
  "\n",
14204
  " <div>\n",
14205
  " \n",
14206
- " <progress value='101' max='4890' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
14207
- " [ 101/4890 01:40 < 1:21:11, 0.98 it/s, Epoch 0.61/30]\n",
14208
  " </div>\n",
14209
  " <table border=\"1\" class=\"dataframe\">\n",
14210
  " <thead>\n",
@@ -14217,8 +14226,8 @@
14217
  " </thead>\n",
14218
  " <tbody>\n",
14219
  " <tr>\n",
14220
- " <td>100</td>\n",
14221
- " <td>16.976400</td>\n",
14222
  " <td>13.300326</td>\n",
14223
  " <td>0.989265</td>\n",
14224
  " </tr>\n",
@@ -14240,8 +14249,11 @@
14240
  "***** Running Evaluation *****\n",
14241
  " Num examples = 291\n",
14242
  " Batch size = 8\n",
14243
- "Saving model checkpoint to ./checkpoint-100\n",
14244
- "Configuration saved in ./checkpoint-100/config.json\n"
 
 
 
14245
  ]
14246
  }
14247
  ],
@@ -14252,7 +14264,7 @@
14252
  {
14253
  "cell_type": "code",
14254
  "execution_count": null,
14255
- "id": "3629e75f",
14256
  "metadata": {},
14257
  "outputs": [],
14258
  "source": []
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": 1,
6
+ "id": "3ae82d97",
7
  "metadata": {
8
  "collapsed": true,
9
  "jupyter": {
 
708
  {
709
  "cell_type": "code",
710
  "execution_count": 2,
711
+ "id": "5082f3ca",
712
  "metadata": {},
713
  "outputs": [],
714
  "source": [
 
721
  {
722
  "cell_type": "code",
723
  "execution_count": null,
724
+ "id": "c5838f22",
725
  "metadata": {
726
  "collapsed": true,
727
  "jupyter": {
 
13484
  {
13485
  "cell_type": "code",
13486
  "execution_count": 3,
13487
+ "id": "94fea955",
13488
  "metadata": {},
13489
  "outputs": [],
13490
  "source": [
 
13505
  {
13506
  "cell_type": "code",
13507
  "execution_count": 32,
13508
+ "id": "0bdb4556",
13509
  "metadata": {},
13510
  "outputs": [
13511
  {
 
13527
  {
13528
  "cell_type": "code",
13529
  "execution_count": 33,
13530
+ "id": "d6645c93",
13531
  "metadata": {},
13532
  "outputs": [],
13533
  "source": [
 
13542
  {
13543
  "cell_type": "code",
13544
  "execution_count": 6,
13545
+ "id": "b64fe901",
13546
  "metadata": {},
13547
  "outputs": [],
13548
  "source": [
 
13558
  {
13559
  "cell_type": "code",
13560
  "execution_count": 7,
13561
+ "id": "65867ef2",
13562
  "metadata": {},
13563
  "outputs": [],
13564
  "source": [
 
13570
  {
13571
  "cell_type": "code",
13572
  "execution_count": 8,
13573
+ "id": "beca9fac",
13574
  "metadata": {},
13575
  "outputs": [
13576
  {
 
13592
  {
13593
  "cell_type": "code",
13594
  "execution_count": 9,
13595
+ "id": "d679de6c",
13596
  "metadata": {},
13597
  "outputs": [],
13598
  "source": [
 
13602
  {
13603
  "cell_type": "code",
13604
  "execution_count": 34,
13605
+ "id": "eb92b2d5",
13606
  "metadata": {},
13607
  "outputs": [
13608
  {
 
13650
  {
13651
  "cell_type": "code",
13652
  "execution_count": 35,
13653
+ "id": "7b8296e6",
13654
  "metadata": {},
13655
  "outputs": [],
13656
  "source": [
 
13661
  {
13662
  "cell_type": "code",
13663
  "execution_count": 36,
13664
+ "id": "78dc9b6c",
13665
  "metadata": {},
13666
  "outputs": [
13667
  {
 
13679
  {
13680
  "cell_type": "code",
13681
  "execution_count": 37,
13682
+ "id": "0f856602",
13683
  "metadata": {},
13684
  "outputs": [
13685
  {
 
13706
  {
13707
  "cell_type": "code",
13708
  "execution_count": 38,
13709
+ "id": "c5f41740",
13710
  "metadata": {},
13711
  "outputs": [],
13712
  "source": [
 
13718
  {
13719
  "cell_type": "code",
13720
  "execution_count": 39,
13721
+ "id": "b750d9cf",
13722
  "metadata": {},
13723
  "outputs": [],
13724
  "source": [
 
13728
  {
13729
  "cell_type": "code",
13730
  "execution_count": 40,
13731
+ "id": "e3f563c7",
13732
  "metadata": {},
13733
  "outputs": [],
13734
  "source": [
 
13740
  {
13741
  "cell_type": "code",
13742
  "execution_count": 41,
13743
+ "id": "8dd37836",
13744
  "metadata": {},
13745
  "outputs": [
13746
  {
 
13761
  {
13762
  "cell_type": "code",
13763
  "execution_count": 26,
13764
+ "id": "8868a601",
13765
  "metadata": {},
13766
  "outputs": [],
13767
  "source": [
 
13778
  {
13779
  "cell_type": "code",
13780
  "execution_count": 27,
13781
+ "id": "3503046d",
13782
  "metadata": {},
13783
  "outputs": [
13784
  {
 
13818
  {
13819
  "cell_type": "code",
13820
  "execution_count": 48,
13821
+ "id": "895f3bfb",
13822
  "metadata": {},
13823
  "outputs": [],
13824
  "source": [
 
13829
  {
13830
  "cell_type": "code",
13831
  "execution_count": 50,
13832
+ "id": "e5515c8d",
13833
  "metadata": {},
13834
  "outputs": [
13835
  {
 
13854
  {
13855
  "cell_type": "code",
13856
  "execution_count": 51,
13857
+ "id": "450c5c90",
13858
  "metadata": {},
13859
  "outputs": [
13860
  {
 
13901
  {
13902
  "cell_type": "code",
13903
  "execution_count": 52,
13904
+ "id": "b10b6aa1",
13905
  "metadata": {},
13906
  "outputs": [],
13907
  "source": [
 
13923
  {
13924
  "cell_type": "code",
13925
  "execution_count": 53,
13926
+ "id": "a1776bef",
13927
  "metadata": {},
13928
  "outputs": [],
13929
  "source": [
 
13934
  {
13935
  "cell_type": "code",
13936
  "execution_count": 41,
13937
+ "id": "8eea6563",
13938
  "metadata": {},
13939
  "outputs": [],
13940
  "source": [
 
13946
  {
13947
  "cell_type": "code",
13948
  "execution_count": 54,
13949
+ "id": "f15bec0a",
13950
  "metadata": {},
13951
  "outputs": [],
13952
  "source": [
 
14005
  {
14006
  "cell_type": "code",
14007
  "execution_count": 55,
14008
+ "id": "d30dafe4",
14009
  "metadata": {},
14010
  "outputs": [],
14011
  "source": [
 
14015
  {
14016
  "cell_type": "code",
14017
  "execution_count": 56,
14018
+ "id": "408bcb4d",
14019
  "metadata": {},
14020
  "outputs": [],
14021
  "source": [
 
14026
  {
14027
  "cell_type": "code",
14028
  "execution_count": 57,
14029
+ "id": "e5573fd8",
14030
  "metadata": {},
14031
  "outputs": [],
14032
  "source": [
 
14050
  {
14051
  "cell_type": "code",
14052
  "execution_count": null,
14053
+ "id": "da8c34ad",
14054
  "metadata": {},
14055
  "outputs": [],
14056
  "source": [
 
14060
  {
14061
  "cell_type": "code",
14062
  "execution_count": null,
14063
+ "id": "229b8ebf",
14064
  "metadata": {},
14065
  "outputs": [],
14066
  "source": []
 
14068
  {
14069
  "cell_type": "code",
14070
  "execution_count": 58,
14071
+ "id": "7b36eee5",
14072
  "metadata": {},
14073
  "outputs": [
14074
  {
 
14102
  {
14103
  "cell_type": "code",
14104
  "execution_count": 59,
14105
+ "id": "7ec46eca",
14106
  "metadata": {},
14107
  "outputs": [
14108
  {
 
14120
  },
14121
  {
14122
  "cell_type": "code",
14123
+ "execution_count": 65,
14124
+ "id": "9db5b382",
14125
  "metadata": {},
14126
+ "outputs": [
14127
+ {
14128
+ "name": "stderr",
14129
+ "output_type": "stream",
14130
+ "text": [
14131
+ "PyTorch: setting up devices\n",
14132
+ "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
14133
+ ]
14134
+ }
14135
+ ],
14136
  "source": [
14137
  "from transformers import TrainingArguments\n",
14138
  "\n",
 
14145
  " gradient_checkpointing=True,\n",
14146
  " fp16=True,\n",
14147
  " num_train_epochs=30,\n",
14148
+ " save_steps=1,\n",
14149
+ " eval_steps=1,\n",
14150
  " logging_steps=100,\n",
14151
  " learning_rate=3e-5,\n",
14152
  " warmup_steps=500,\n",
 
14157
  },
14158
  {
14159
  "cell_type": "code",
14160
+ "execution_count": 66,
14161
+ "id": "96c9aeb9",
14162
  "metadata": {},
14163
  "outputs": [
14164
  {
 
14187
  {
14188
  "cell_type": "code",
14189
  "execution_count": null,
14190
+ "id": "a3a38848",
14191
  "metadata": {},
14192
  "outputs": [
14193
  {
 
14212
  "\n",
14213
  " <div>\n",
14214
  " \n",
14215
+ " <progress value='2' max='4890' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
14216
+ " [ 2/4890 : < :, Epoch 0.01/30]\n",
14217
  " </div>\n",
14218
  " <table border=\"1\" class=\"dataframe\">\n",
14219
  " <thead>\n",
 
14226
  " </thead>\n",
14227
  " <tbody>\n",
14228
  " <tr>\n",
14229
+ " <td>1</td>\n",
14230
+ " <td>No log</td>\n",
14231
  " <td>13.300326</td>\n",
14232
  " <td>0.989265</td>\n",
14233
  " </tr>\n",
 
14249
  "***** Running Evaluation *****\n",
14250
  " Num examples = 291\n",
14251
  " Batch size = 8\n",
14252
+ "Saving model checkpoint to ./checkpoint-1\n",
14253
+ "Configuration saved in ./checkpoint-1/config.json\n",
14254
+ "Model weights saved in ./checkpoint-1/pytorch_model.bin\n",
14255
+ "Configuration saved in ./checkpoint-1/preprocessor_config.json\n",
14256
+ "Configuration saved in ./preprocessor_config.json\n"
14257
  ]
14258
  }
14259
  ],
 
14264
  {
14265
  "cell_type": "code",
14266
  "execution_count": null,
14267
+ "id": "e27620ac",
14268
  "metadata": {},
14269
  "outputs": [],
14270
  "source": []
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f53351c36f9166d72fba05a26550d456156793a4caab264b38fc1e6437cf34a8
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b35c85cfceecf16b9f8a4306c6c419ed33469f21c8e3b016a95fdbbcd87ddbb
3
  size 2991