['/home/yogeshchandrasekharuni/Developer/food-ordering/PURE/run_entity.py', '--do_train', '--do_eval', '--learning_rate=1e-5', '--task_learning_rate=5e-4', '--train_batch_size=16', '--context_window=0', '--task=food', '--data_dir=./custom_data', '--model=bert-base-uncased', '--output_dir=./custom_data_out'] Namespace(bert_model_dir=None, bertadam=False, context_window=0, data_dir='./custom_data', dev_data='./custom_data/dev.json', dev_pred_filename='ent_pred_dev.json', do_eval=True, do_train=True, eval_batch_size=32, eval_per_epoch=1, eval_test=False, learning_rate=1e-05, max_span_length=8, model='bert-base-uncased', num_epoch=100, output_dir='./custom_data_out', print_loss_step=100, seed=0, task='food', task_learning_rate=0.0005, test_data='./custom_data/test.json', test_pred_filename='ent_pred_test.json', train_batch_size=16, train_data='./custom_data/train.json', train_shuffle=False, use_albert=False, warmup_proportion=0.1) Moving to CUDA... # GPUs = 1 # Overlap: 0 Extracted 11284 samples from 11284 documents, with 44018 NER labels, 17.283 avg input length, 52 max length Max Length: 52, max NER: 12 # Overlap: 0 Extracted 44972 samples from 44972 documents, with 173425 NER labels, 17.131 avg input length, 55 max length Max Length: 55, max NER: 12 Epoch=0, iter=99, loss=243.44804 Epoch=0, iter=199, loss=235.79827 Epoch=0, iter=299, loss=233.29755 Epoch=0, iter=399, loss=202.52836 Epoch=0, iter=499, loss=118.44091 Epoch=0, iter=599, loss=41.34291 Epoch=0, iter=699, loss=25.83691 Epoch=0, iter=799, loss=24.32704 Epoch=0, iter=899, loss=23.66524 Epoch=0, iter=999, loss=23.12145 Epoch=0, iter=1099, loss=22.90508 Epoch=0, iter=1199, loss=22.97754 Epoch=0, iter=1299, loss=21.99249 Epoch=0, iter=1399, loss=22.37460 Epoch=0, iter=1499, loss=22.32461 Epoch=0, iter=1599, loss=21.33955 Epoch=0, iter=1699, loss=20.31395 Epoch=0, iter=1799, loss=19.90072 Epoch=0, iter=1899, loss=19.10657 Epoch=0, iter=1999, loss=17.86468 Epoch=0, iter=2099, loss=17.70574 Epoch=0, iter=2199, loss=15.95796 Epoch=0, iter=2299, loss=15.67962 Epoch=0, iter=2399, loss=14.26441 Epoch=0, iter=2499, loss=13.86101 Epoch=0, iter=2599, loss=12.75729 Epoch=0, iter=2699, loss=12.65991 Epoch=0, iter=2799, loss=12.01459 Evaluating... Accuracy: 0.973822 Cor: 14929, Pred TOT: 21103, Gold TOT: 44018 P: 0.70743, R: 0.33916, F1: 0.45850 Used time: 97.782789 !!! Best valid (epoch=0): 45.85 Saving model to ./custom_data_out... Epoch=1, iter=88, loss=11.36566 Epoch=1, iter=188, loss=10.61195 Epoch=1, iter=288, loss=10.59556 Epoch=1, iter=388, loss=9.86007 Epoch=1, iter=488, loss=9.40349 Epoch=1, iter=588, loss=9.20889 Epoch=1, iter=688, loss=8.78756 Epoch=1, iter=788, loss=8.56425 Epoch=1, iter=888, loss=8.51660 Epoch=1, iter=988, loss=7.91102 Epoch=1, iter=1088, loss=7.51398 Epoch=1, iter=1188, loss=7.35832 Epoch=1, iter=1288, loss=6.69688 Epoch=1, iter=1388, loss=6.77910 Epoch=1, iter=1488, loss=6.58863 Epoch=1, iter=1588, loss=6.22025 Epoch=1, iter=1688, loss=5.78497 Epoch=1, iter=1788, loss=5.83271 Epoch=1, iter=1888, loss=5.79998 Epoch=1, iter=1988, loss=5.42660 Epoch=1, iter=2088, loss=5.18899 Epoch=1, iter=2188, loss=4.97301 Epoch=1, iter=2288, loss=5.00884 Epoch=1, iter=2388, loss=4.51213 Epoch=1, iter=2488, loss=4.79700 Epoch=1, iter=2588, loss=4.24864 Epoch=1, iter=2688, loss=4.32532 Epoch=1, iter=2788, loss=4.28969 Evaluating... Accuracy: 0.990703 Cor: 34145, Pred TOT: 36355, Gold TOT: 44018 P: 0.93921, R: 0.77571, F1: 0.84966 Used time: 101.091194 !!! Best valid (epoch=1): 84.97 Saving model to ./custom_data_out... Epoch=2, iter=77, loss=4.11897 Epoch=2, iter=177, loss=3.93580 Epoch=2, iter=277, loss=4.09057 Epoch=2, iter=377, loss=4.04855 Epoch=2, iter=477, loss=3.67340 Epoch=2, iter=577, loss=3.70883 Epoch=2, iter=677, loss=3.42234 Epoch=2, iter=777, loss=3.61931 Epoch=2, iter=877, loss=3.53848 Epoch=2, iter=977, loss=3.27525 Epoch=2, iter=1077, loss=3.22880 Epoch=2, iter=1177, loss=3.33254 Epoch=2, iter=1277, loss=3.03457 Epoch=2, iter=1377, loss=3.18838 Epoch=2, iter=1477, loss=3.05527 Epoch=2, iter=1577, loss=3.08948 Epoch=2, iter=1677, loss=2.82804 Epoch=2, iter=1777, loss=3.00084 Epoch=2, iter=1877, loss=2.98393 Epoch=2, iter=1977, loss=2.79067 Epoch=2, iter=2077, loss=2.85030 Epoch=2, iter=2177, loss=2.71113 Epoch=2, iter=2277, loss=2.65620 Epoch=2, iter=2377, loss=2.55023 Epoch=2, iter=2477, loss=2.66498 Epoch=2, iter=2577, loss=2.45346 Epoch=2, iter=2677, loss=2.45250 Epoch=2, iter=2777, loss=2.49077 Evaluating... Accuracy: 0.993408 Cor: 36716, Pred TOT: 37983, Gold TOT: 44018 P: 0.96664, R: 0.83411, F1: 0.89550 Used time: 99.946823 !!! Best valid (epoch=2): 89.55 Saving model to ./custom_data_out... Epoch=3, iter=66, loss=2.44420 Epoch=3, iter=166, loss=2.39805 Epoch=3, iter=266, loss=2.45792 Epoch=3, iter=366, loss=2.53090 Epoch=3, iter=466, loss=2.25996 Epoch=3, iter=566, loss=2.26043 Epoch=3, iter=666, loss=2.17943 Epoch=3, iter=766, loss=2.19351 Epoch=3, iter=866, loss=2.40202 Epoch=3, iter=966, loss=2.09336 Epoch=3, iter=1066, loss=2.08938 Epoch=3, iter=1166, loss=2.16629 Epoch=3, iter=1266, loss=1.93024 Epoch=3, iter=1366, loss=2.07213 Epoch=3, iter=1466, loss=1.93386 Epoch=3, iter=1566, loss=1.99428 Epoch=3, iter=1666, loss=1.87277 Epoch=3, iter=1766, loss=1.92387 Epoch=3, iter=1866, loss=1.82625 Epoch=3, iter=1966, loss=1.82094 Epoch=3, iter=2066, loss=1.74876 Epoch=3, iter=2166, loss=1.67695 Epoch=3, iter=2266, loss=1.61129 Epoch=3, iter=2366, loss=1.68518 Epoch=3, iter=2466, loss=1.56917 Epoch=3, iter=2566, loss=1.49777 Epoch=3, iter=2666, loss=1.50936 Epoch=3, iter=2766, loss=1.59259 Evaluating... Accuracy: 0.996364 Cor: 40160, Pred TOT: 41149, Gold TOT: 44018 P: 0.97597, R: 0.91235, F1: 0.94309 Used time: 99.399823 !!! Best valid (epoch=3): 94.31 Saving model to ./custom_data_out... Epoch=4, iter=55, loss=1.51106 Epoch=4, iter=155, loss=1.43524 Epoch=4, iter=255, loss=1.50080 Epoch=4, iter=355, loss=1.53660 Epoch=4, iter=455, loss=1.40172 Epoch=4, iter=555, loss=1.34934 Epoch=4, iter=655, loss=1.31810 Epoch=4, iter=755, loss=1.38727 Epoch=4, iter=855, loss=1.45979 Epoch=4, iter=955, loss=1.18226 Epoch=4, iter=1055, loss=1.31470 Epoch=4, iter=1155, loss=1.33524 Epoch=4, iter=1255, loss=1.19242 Epoch=4, iter=1355, loss=1.22670 Epoch=4, iter=1455, loss=1.11627 Epoch=4, iter=1555, loss=1.17806 Epoch=4, iter=1655, loss=1.05301 Epoch=4, iter=1755, loss=1.20741 Epoch=4, iter=1855, loss=1.14403 Epoch=4, iter=1955, loss=1.10366 Epoch=4, iter=2055, loss=0.99180 Epoch=4, iter=2155, loss=1.07554 Epoch=4, iter=2255, loss=0.96035 Epoch=4, iter=2355, loss=1.06883 Epoch=4, iter=2455, loss=0.98427 Epoch=4, iter=2555, loss=1.02981 Epoch=4, iter=2655, loss=0.98175 Epoch=4, iter=2755, loss=0.96598 Evaluating... Accuracy: 0.997638 Cor: 41425, Pred TOT: 42095, Gold TOT: 44018 P: 0.98408, R: 0.94109, F1: 0.96211 Used time: 98.526833 !!! Best valid (epoch=4): 96.21 Saving model to ./custom_data_out... Epoch=5, iter=44, loss=1.04638 Epoch=5, iter=144, loss=0.93053 Epoch=5, iter=244, loss=0.93799 Epoch=5, iter=344, loss=0.98231 Epoch=5, iter=444, loss=0.95388 Epoch=5, iter=544, loss=0.92944 Epoch=5, iter=644, loss=0.87617 Epoch=5, iter=744, loss=0.90396 Epoch=5, iter=844, loss=0.90082 Epoch=5, iter=944, loss=0.81391 Epoch=5, iter=1044, loss=0.85680 Epoch=5, iter=1144, loss=0.90453 Epoch=5, iter=1244, loss=0.82384