Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
config_saved.json +1 -0
supervised.pol.mdl +3 -0
train_INFO.log +341 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+supervised.pol.mdl filter=lfs diff=lfs merge=lfs -text

config_saved.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"args": {"seed": 0, "eval_freq": 2, "dataset_name": "multiwoz21", "model_path": "onvlab/policy/vtrace_DPT/supervised/experiments/sgd/save/supervised.pol.mdl"}, "config": {"batchsz": 64, "epoch": 40, "gamma": 0.99, "policy_lr": 5e-05, "supervised_lr": 1e-05, "entropy_weight": 0.01, "value_lr": 0.0001, "save_dir": "save", "log_dir": "log", "save_per_epoch": 5000, "hidden_size": 256, "load": "save/best", "logging_mode": "INFO", "use_cer": true, "memory_size": 5000, "behaviour_cloning_weight": 0.1, "supervised_weight": 0.0, "online_offline_ratio": 0.2, "smoothed_value_function": false, "use_reservoir_sampling": false, "seed": 0, "lambda": 1, "tau": 0.001, "policy_freq": 2, "print_per_batch": 400, "c": 1.0, "rho_bar": 1, "max_length": 10, "noisy_linear": false, "dataset_name": "multiwoz21", "data_percentage": 0.01, "multiwoz_like": false, "regularization_weight": 0.0, "enc_input_dim": 128, "enc_nhead": 2, "enc_d_hid": 128, "enc_nlayers": 4, "enc_dropout": 0.1, "dec_input_dim": 128, "dec_nhead": 2, "dec_d_hid": 128, "dec_nlayers": 2, "dec_dropout": 0.0, "action_embedding_dim": 128, "domain_embedding_dim": 64, "value_embedding_dim": 12, "node_embedding_dim": 128, "roberta_path": "", "node_attention": true, "semantic_descriptions": true, "freeze_roberta": true, "use_pooled": false, "mean": true, "roberta_actions": true, "independent_descriptions": true, "random_matrix": false, "distance_metric": false, "verbose": false, "ignore_features": [], "domains_removed": ["hospital", "police", "train", "hotel", "attraction", "taxi"], "only_active_values": false, "permuted_data": false, "need_weights": false, "cls_dim": 128, "independent": true, "old_critic": false, "pos_weight": 5, "weight_decay": 1e-05}, "policy_config": null}

supervised.pol.mdl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6992b768e91941f87a8f8275c88e5e3accf998bf4a1f7fbb5eb0bb337bd7fa6f
+size 9331458

train_INFO.log ADDED Viewed

	@@ -0,0 +1,341 @@

+Visible device: cuda
+Seed used: 0
+Batch size: 64
+Epochs: 40
+Learning rate: 1e-05
+Entropy weight: 0.01
+Regularization weight: 0.0
+Only use multiwoz like domains: False
+Vectorizer: Data set used is multiwoz21
+We filter state by active domains: True
+Vectorizer: Data set used is multiwoz21
+Embedding semantic descriptions: True
+Embedded descriptions successfully. Size: torch.Size([338, 768])
+Data set used for descriptions: multiwoz21
+We use Roberta to embed actions.
+Didnt load a model
+Start training
+Epoch: 0
+Precision: 0
+Recall: 0
+F1: 0
+Best Precision: 0.0
+Best Recall: 0.0
+Best F1: 0.0
+Epoch: 1
+Precision: 0
+Recall: 0
+F1: 0
+Best Precision: 0.0
+Best Recall: 0.0
+Best F1: 0.0
+Epoch: 2
+Average actions: 2.4348959922790527
+Average target actions: 2.28125
+Precision: 0.043010752688172046
+Recall: 0.0425531914893617
+F1: 0.04278074866310161
+<<dialog policy>> epoch 2: saved network to mdl
+Best Precision: 0.043010752688172046
+Best Recall: 0.0425531914893617
+Best F1: 0.04278074866310161
+Epoch: 3
+Precision: 0.043010752688172046
+Recall: 0.0425531914893617
+F1: 0.04278074866310161
+Best Precision: 0.043010752688172046
+Best Recall: 0.0425531914893617
+Best F1: 0.04278074866310161
+Epoch: 4
+Average actions: 2.4114584922790527
+Average target actions: 2.7890625
+Precision: 0.07058823529411765
+Recall: 0.06382978723404255
+F1: 0.06703910614525138
+<<dialog policy>> epoch 4: saved network to mdl
+Best Precision: 0.07058823529411765
+Best Recall: 0.06382978723404255
+Best F1: 0.06703910614525138
+Epoch: 5
+Precision: 0.07058823529411765
+Recall: 0.06382978723404255
+F1: 0.06703910614525138
+Best Precision: 0.07058823529411765
+Best Recall: 0.06382978723404255
+Best F1: 0.06703910614525138
+Epoch: 6
+Average actions: 2.1536459922790527
+Average target actions: 2.5859375
+Precision: 0.049079754601226995
+Recall: 0.0425531914893617
+F1: 0.045584045584045586
+Best Precision: 0.07058823529411765
+Best Recall: 0.06382978723404255
+Best F1: 0.06703910614525138
+Epoch: 7
+Precision: 0.049079754601226995
+Recall: 0.0425531914893617
+F1: 0.045584045584045586
+Best Precision: 0.07058823529411765
+Best Recall: 0.06382978723404255
+Best F1: 0.06703910614525138
+Epoch: 8
+Average actions: 2.15625
+Average target actions: 2.5520834922790527
+Precision: 0.07547169811320754
+Recall: 0.06382978723404255
+F1: 0.06916426512968299
+<<dialog policy>> epoch 8: saved network to mdl
+Best Precision: 0.07547169811320754
+Best Recall: 0.06382978723404255
+Best F1: 0.06916426512968299
+Epoch: 9
+Precision: 0.07547169811320754
+Recall: 0.06382978723404255
+F1: 0.06916426512968299
+Best Precision: 0.07547169811320754
+Best Recall: 0.06382978723404255
+Best F1: 0.06916426512968299
+Epoch: 10
+Average actions: 2.0572915077209473
+Average target actions: 2.3489584922790527
+Precision: 0.04516129032258064
+Recall: 0.03723404255319149
+F1: 0.04081632653061224
+Best Precision: 0.07547169811320754
+Best Recall: 0.06382978723404255
+Best F1: 0.06916426512968299
+Epoch: 11
+Precision: 0.04516129032258064
+Recall: 0.03723404255319149
+F1: 0.04081632653061224
+Best Precision: 0.07547169811320754
+Best Recall: 0.06382978723404255
+Best F1: 0.06916426512968299
+Epoch: 12
+Average actions: 1.984375
+Average target actions: 2.5520834922790527
+Precision: 0.08666666666666667
+Recall: 0.06914893617021277
+F1: 0.07692307692307691
+<<dialog policy>> epoch 12: saved network to mdl
+Best Precision: 0.08666666666666667
+Best Recall: 0.06914893617021277
+Best F1: 0.07692307692307691
+Epoch: 13
+Precision: 0.08666666666666667
+Recall: 0.06914893617021277
+F1: 0.07692307692307691
+Best Precision: 0.08666666666666667
+Best Recall: 0.06914893617021277
+Best F1: 0.07692307692307691
+Epoch: 14
+Average actions: 2.0416665077209473
+Average target actions: 2.3828125
+Precision: 0.05228758169934641
+Recall: 0.0425531914893617
+F1: 0.046920821114369494
+Best Precision: 0.08666666666666667
+Best Recall: 0.06914893617021277
+Best F1: 0.07692307692307691
+Epoch: 15
+Precision: 0.05228758169934641
+Recall: 0.0425531914893617
+F1: 0.046920821114369494
+Best Precision: 0.08666666666666667
+Best Recall: 0.06914893617021277
+Best F1: 0.07692307692307691
+Epoch: 16
+Average actions: 2.1666665077209473
+Average target actions: 2.2135417461395264
+Precision: 0.1346153846153846
+Recall: 0.11170212765957446
+F1: 0.12209302325581395
+<<dialog policy>> epoch 16: saved network to mdl
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 17
+Precision: 0.1346153846153846
+Recall: 0.11170212765957446
+F1: 0.12209302325581395
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 18
+Average actions: 1.7734375
+Average target actions: 2.5520834922790527
+Precision: 0.0661764705882353
+Recall: 0.047872340425531915
+F1: 0.05555555555555556
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 19
+Precision: 0.0661764705882353
+Recall: 0.047872340425531915
+F1: 0.05555555555555556
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 20
+Average actions: 2.1328125
+Average target actions: 2.6197917461395264
+Precision: 0.1346153846153846
+Recall: 0.11170212765957446
+F1: 0.12209302325581395
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 21
+Precision: 0.1346153846153846
+Recall: 0.11170212765957446
+F1: 0.12209302325581395
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 22
+Average actions: 1.9296875
+Average target actions: 2.1119792461395264
+Precision: 0.08391608391608392
+Recall: 0.06382978723404255
+F1: 0.07250755287009063
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 23
+Precision: 0.08391608391608392
+Recall: 0.06382978723404255
+F1: 0.07250755287009063
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 24
+Average actions: 2.2213540077209473
+Average target actions: 2.3151042461395264
+Precision: 0.09815950920245399
+Recall: 0.0851063829787234
+F1: 0.09116809116809117
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 25
+Precision: 0.09815950920245399
+Recall: 0.0851063829787234
+F1: 0.09116809116809117
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 26
+Average actions: 2.1171875
+Average target actions: 2.7890625
+Precision: 0.12987012987012986
+Recall: 0.10638297872340426
+F1: 0.11695906432748537
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 27
+Precision: 0.12987012987012986
+Recall: 0.10638297872340426
+F1: 0.11695906432748537
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 28
+Average actions: 1.7734375
+Average target actions: 2.484375
+Precision: 0.08823529411764706
+Recall: 0.06382978723404255
+F1: 0.07407407407407407
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 29
+Precision: 0.08823529411764706
+Recall: 0.06382978723404255
+F1: 0.07407407407407407
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 30
+Average actions: 2.1822915077209473
+Average target actions: 2.3489584922790527
+Precision: 0.10126582278481013
+Recall: 0.0851063829787234
+F1: 0.09248554913294797
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 31
+Precision: 0.10126582278481013
+Recall: 0.0851063829787234
+F1: 0.09248554913294797
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 32
+Average actions: 2.0442707538604736
+Average target actions: 2.6197917461395264
+Precision: 0.12345679012345678
+Recall: 0.10638297872340426
+F1: 0.11428571428571428
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 33
+Precision: 0.12345679012345678
+Recall: 0.10638297872340426
+F1: 0.11428571428571428
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 34
+Average actions: 1.8307292461395264
+Average target actions: 2.5859375
+Precision: 0.11510791366906475
+Recall: 0.0851063829787234
+F1: 0.09785932721712538
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 35
+Precision: 0.11510791366906475
+Recall: 0.0851063829787234
+F1: 0.09785932721712538
+Best Precision: 0.1346153846153846
+Best Recall: 0.11170212765957446
+Best F1: 0.12209302325581395
+Epoch: 36
+Average actions: 2.2838540077209473
+Average target actions: 2.3489584922790527
+Precision: 0.1286549707602339
+Recall: 0.11702127659574468
+F1: 0.12256267409470752
+<<dialog policy>> epoch 36: saved network to mdl
+Best Precision: 0.1346153846153846
+Best Recall: 0.11702127659574468
+Best F1: 0.12256267409470752
+Epoch: 37
+Precision: 0.1286549707602339
+Recall: 0.11702127659574468
+F1: 0.12256267409470752
+Best Precision: 0.1346153846153846
+Best Recall: 0.11702127659574468
+Best F1: 0.12256267409470752
+Epoch: 38
+Average actions: 1.9479167461395264
+Average target actions: 2.7552084922790527
+Precision: 0.12337662337662338
+Recall: 0.10106382978723404
+F1: 0.1111111111111111
+Best Precision: 0.1346153846153846
+Best Recall: 0.11702127659574468
+Best F1: 0.12256267409470752
+Epoch: 39
+Precision: 0.12337662337662338
+Recall: 0.10106382978723404
+F1: 0.1111111111111111
+Best Precision: 0.1346153846153846
+Best Recall: 0.11702127659574468
+Best F1: 0.12256267409470752