Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
config_saved.json +1 -0
supervised.pol.mdl +3 -0
train_INFO.log +351 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+supervised.pol.mdl filter=lfs diff=lfs merge=lfs -text

config_saved.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"args": {"seed": 1, "eval_freq": 2, "dataset_name": "multiwoz21", "model_path": "NO/seed1/save/supervised.pol.mdl"}, "config": {"batchsz": 64, "epoch": 40, "gamma": 0.99, "policy_lr": 5e-06, "supervised_lr": 1e-05, "entropy_weight": 0.01, "value_lr": 0.0001, "save_dir": "save", "log_dir": "log", "save_per_epoch": 5000, "hidden_size": 256, "load": "save/best", "logging_mode": "INFO", "use_cer": true, "memory_size": 5000, "behaviour_cloning_weight": 0.1, "supervised_weight": 0.0, "online_offline_ratio": 0.2, "smoothed_value_function": false, "use_reservoir_sampling": false, "seed": 0, "lambda": 1, "tau": 0.001, "policy_freq": 1, "print_per_batch": 400, "c": 1.0, "rho_bar": 1, "max_length": 10, "noisy_linear": false, "dataset_name": "multiwoz21", "data_percentage": 1.0, "dialogue_order": 0, "multiwoz_like": false, "regularization_weight": 0.0, "enc_input_dim": 128, "enc_nhead": 2, "enc_d_hid": 128, "enc_nlayers": 4, "enc_dropout": 0.1, "dec_input_dim": 128, "dec_nhead": 2, "dec_d_hid": 128, "dec_nlayers": 2, "dec_dropout": 0.0, "action_embedding_dim": 128, "domain_embedding_dim": 64, "value_embedding_dim": 12, "node_embedding_dim": 128, "roberta_path": "", "node_attention": true, "semantic_descriptions": true, "freeze_roberta": true, "use_pooled": false, "mean": true, "roberta_actions": true, "independent_descriptions": true, "random_matrix": false, "distance_metric": false, "verbose": false, "ignore_features": [], "domains_removed": ["hospital", "police", "train", "hotel", "attraction", "taxi"], "only_active_values": false, "permuted_data": false, "need_weights": false, "cls_dim": 128, "independent": true, "old_critic": false, "pos_weight": 5, "weight_decay": 1e-05}, "policy_config": null}

supervised.pol.mdl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:847929fde204d26f279c7002ad7b8eb1108df943c24e207cd5bf01d2892f55ed
+size 9331458

train_INFO.log ADDED Viewed

	@@ -0,0 +1,351 @@

+Visible device: cuda
+Seed used: 1
+Batch size: 64
+Epochs: 40
+Learning rate: 1e-05
+Entropy weight: 0.01
+Regularization weight: 0.0
+Only use multiwoz like domains: False
+We use: 100.0% of the data
+Dialogue order used: 0
+Vectorizer: Data set used is multiwoz21
+We filter state by active domains: True
+Vectorizer: Data set used is multiwoz21
+Embedding semantic descriptions: True
+Embedded descriptions successfully. Size: torch.Size([338, 768])
+Data set used for descriptions: multiwoz21
+We use Roberta to embed actions.
+Didnt load a model
+Start training
+Epoch: 0
+Average actions: 1.957058072090149
+Average target actions: 2.669339895248413
+Precision: 0.13822525597269625
+Recall: 0.10146667362597213
+F1: 0.11702736056346508
+<<dialog policy>> epoch 0: saved network to mdl
+Best Precision: 0.13822525597269625
+Best Recall: 0.10146667362597213
+Best F1: 0.11702736056346508
+Epoch: 1
+Precision: 0.13822525597269625
+Recall: 0.10146667362597213
+F1: 0.11702736056346508
+Best Precision: 0.13822525597269625
+Best Recall: 0.10146667362597213
+Best F1: 0.11702736056346508
+Epoch: 2
+Average actions: 2.0794308185577393
+Average target actions: 2.6675729751586914
+Precision: 0.22303363258743134
+Recall: 0.1737564591053813
+F1: 0.19533519143318176
+<<dialog policy>> epoch 2: saved network to mdl
+Best Precision: 0.22303363258743134
+Best Recall: 0.1737564591053813
+Best F1: 0.19533519143318176
+Epoch: 3
+Precision: 0.22303363258743134
+Recall: 0.1737564591053813
+F1: 0.19533519143318176
+Best Precision: 0.22303363258743134
+Best Recall: 0.1737564591053813
+Best F1: 0.19533519143318176
+Epoch: 4
+Average actions: 2.0110926628112793
+Average target actions: 2.665806293487549
+Precision: 0.26409084614319345
+Recall: 0.19907093272091445
+F1: 0.22701705306389688
+<<dialog policy>> epoch 4: saved network to mdl
+Best Precision: 0.26409084614319345
+Best Recall: 0.19907093272091445
+Best F1: 0.22701705306389688
+Epoch: 5
+Precision: 0.26409084614319345
+Recall: 0.19907093272091445
+F1: 0.22701705306389688
+Best Precision: 0.26409084614319345
+Best Recall: 0.19907093272091445
+Best F1: 0.22701705306389688
+Epoch: 6
+Average actions: 1.9673057794570923
+Average target actions: 2.667219877243042
+Precision: 0.2910210146465719
+Recall: 0.21467717521791324
+F1: 0.2470863871200288
+<<dialog policy>> epoch 6: saved network to mdl
+Best Precision: 0.2910210146465719
+Best Recall: 0.21467717521791324
+Best F1: 0.2470863871200288
+Epoch: 7
+Precision: 0.2910210146465719
+Recall: 0.21467717521791324
+F1: 0.2470863871200288
+Best Precision: 0.2910210146465719
+Best Recall: 0.21467717521791324
+Best F1: 0.2470863871200288
+Epoch: 8
+Average actions: 1.8258512020111084
+Average target actions: 2.667926549911499
+Precision: 0.30450038138825325
+Recall: 0.20836160551176994
+F1: 0.24742012457776819
+<<dialog policy>> epoch 8: saved network to mdl
+Best Precision: 0.30450038138825325
+Best Recall: 0.21467717521791324
+Best F1: 0.24742012457776819
+Epoch: 9
+Precision: 0.30450038138825325
+Recall: 0.20836160551176994
+F1: 0.24742012457776819
+Best Precision: 0.30450038138825325
+Best Recall: 0.21467717521791324
+Best F1: 0.24742012457776819
+Epoch: 10
+Average actions: 1.7796674966812134
+Average target actions: 2.66333270072937
+Precision: 0.3297132588483475
+Recall: 0.2202620178506185
+F1: 0.2640966268227048
+<<dialog policy>> epoch 10: saved network to mdl
+Best Precision: 0.3297132588483475
+Best Recall: 0.2202620178506185
+Best F1: 0.2640966268227048
+Epoch: 11
+Precision: 0.3297132588483475
+Recall: 0.2202620178506185
+F1: 0.2640966268227048
+Best Precision: 0.3297132588483475
+Best Recall: 0.2202620178506185
+Best F1: 0.2640966268227048
+Epoch: 12
+Average actions: 1.8398014307022095
+Average target actions: 2.67004656791687
+Precision: 0.34064769975786924
+Recall: 0.23498094890129964
+F1: 0.27811583011583013
+<<dialog policy>> epoch 12: saved network to mdl
+Best Precision: 0.34064769975786924
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 13
+Precision: 0.34064769975786924
+Recall: 0.23498094890129964
+F1: 0.27811583011583013
+Best Precision: 0.34064769975786924
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 14
+Average actions: 1.7070426940917969
+Average target actions: 2.667219877243042
+Precision: 0.35462034091835903
+Recall: 0.22694295109348087
+F1: 0.2767663908338638
+Best Precision: 0.35462034091835903
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 15
+Precision: 0.35462034091835903
+Recall: 0.22694295109348087
+F1: 0.2767663908338638
+Best Precision: 0.35462034091835903
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 16
+Average actions: 1.6812468767166138
+Average target actions: 2.6643927097320557
+Precision: 0.34859650575474044
+Recall: 0.21974006994101988
+F1: 0.2695607632219234
+Best Precision: 0.35462034091835903
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 17
+Precision: 0.34859650575474044
+Recall: 0.21974006994101988
+F1: 0.2695607632219234
+Best Precision: 0.35462034091835903
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 18
+Average actions: 1.675270438194275
+Average target actions: 2.6640396118164062
+Precision: 0.35976419794088343
+Recall: 0.22616002922908293
+F1: 0.27772970547703746
+Best Precision: 0.35976419794088343
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 19
+Precision: 0.35976419794088343
+Recall: 0.22616002922908293
+F1: 0.27772970547703746
+Best Precision: 0.35976419794088343
+Best Recall: 0.23498094890129964
+Best F1: 0.27811583011583013
+Epoch: 20
+Average actions: 1.5666790008544922
+Average target actions: 2.6647462844848633
+Precision: 0.3769442716203004
+Recall: 0.2213581084607756
+F1: 0.27892140743176586
+<<dialog policy>> epoch 20: saved network to mdl
+Best Precision: 0.3769442716203004
+Best Recall: 0.23498094890129964
+Best F1: 0.27892140743176586
+Epoch: 21
+Precision: 0.3769442716203004
+Recall: 0.2213581084607756
+F1: 0.27892140743176586
+Best Precision: 0.3769442716203004
+Best Recall: 0.23498094890129964
+Best F1: 0.27892140743176586
+Epoch: 22
+Average actions: 1.6693706512451172
+Average target actions: 2.6661596298217773
+Precision: 0.3716379382130069
+Recall: 0.23294535205386502
+F1: 0.2863834702258727
+<<dialog policy>> epoch 22: saved network to mdl
+Best Precision: 0.3769442716203004
+Best Recall: 0.23498094890129964
+Best F1: 0.2863834702258727
+Epoch: 23
+Precision: 0.3716379382130069
+Recall: 0.23294535205386502
+F1: 0.2863834702258727
+Best Precision: 0.3769442716203004
+Best Recall: 0.23498094890129964
+Best F1: 0.2863834702258727
+Epoch: 24
+Average actions: 1.6701388359069824
+Average target actions: 2.6643927097320557
+Precision: 0.3714618714618715
+Recall: 0.23289315726290516
+F1: 0.2862917455327067
+Best Precision: 0.3769442716203004
+Best Recall: 0.23498094890129964
+Best F1: 0.2863834702258727
+Epoch: 25
+Precision: 0.3714618714618715
+Recall: 0.23289315726290516
+F1: 0.2862917455327067
+Best Precision: 0.3769442716203004
+Best Recall: 0.23498094890129964
+Best F1: 0.2863834702258727
+Epoch: 26
+Average actions: 1.6909722089767456
+Average target actions: 2.665099620819092
+Precision: 0.3781160016454134
+Recall: 0.2398872592515267
+F1: 0.2935428242958421
+<<dialog policy>> epoch 26: saved network to mdl
+Best Precision: 0.3781160016454134
+Best Recall: 0.2398872592515267
+Best F1: 0.2935428242958421
+Epoch: 27
+Precision: 0.3781160016454134
+Recall: 0.2398872592515267
+F1: 0.2935428242958421
+Best Precision: 0.3781160016454134
+Best Recall: 0.2398872592515267
+Best F1: 0.2935428242958421
+Epoch: 28
+Average actions: 1.8047566413879395
+Average target actions: 2.6643927097320557
+Precision: 0.3654779326811985
+Recall: 0.24766428310454616
+F1: 0.29525231783958683
+<<dialog policy>> epoch 28: saved network to mdl
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 29
+Precision: 0.3654779326811985
+Recall: 0.24766428310454616
+F1: 0.29525231783958683
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 30
+Average actions: 1.680601716041565
+Average target actions: 2.6640396118164062
+Precision: 0.37665562913907286
+Recall: 0.23748629886737305
+F1: 0.2913025384935497
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 31
+Precision: 0.37665562913907286
+Recall: 0.23748629886737305
+F1: 0.2913025384935497
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 32
+Average actions: 1.7778853178024292
+Average target actions: 2.667219877243042
+Precision: 0.3660120491354354
+Recall: 0.2441672321102354
+F1: 0.2929242329367564
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 33
+Precision: 0.3660120491354354
+Recall: 0.2441672321102354
+F1: 0.2929242329367564
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 34
+Average actions: 1.726846694946289
+Average target actions: 2.66333270072937
+Precision: 0.3723121526938874
+Recall: 0.24129651860744297
+F1: 0.29281732961743095
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 35
+Precision: 0.3723121526938874
+Recall: 0.24129651860744297
+F1: 0.29281732961743095
+Best Precision: 0.3781160016454134
+Best Recall: 0.24766428310454616
+Best F1: 0.29525231783958683
+Epoch: 36
+Average actions: 1.8067078590393066
+Average target actions: 2.6675729751586914
+Precision: 0.37099753694581283
+Recall: 0.2515788924265358
+F1: 0.29983515287238344
+<<dialog policy>> epoch 36: saved network to mdl
+Best Precision: 0.3781160016454134
+Best Recall: 0.2515788924265358
+Best F1: 0.29983515287238344
+Epoch: 37
+Precision: 0.37099753694581283
+Recall: 0.2515788924265358
+F1: 0.29983515287238344
+Best Precision: 0.3781160016454134
+Best Recall: 0.2515788924265358
+Best F1: 0.29983515287238344
+Epoch: 38
+Average actions: 1.7964909076690674
+Average target actions: 2.6647462844848633
+Precision: 0.36536823356307596
+Recall: 0.2462550237486299
+F1: 0.2942130207034173
+Best Precision: 0.3781160016454134
+Best Recall: 0.2515788924265358
+Best F1: 0.29983515287238344
+Epoch: 39
+Precision: 0.36536823356307596
+Recall: 0.2462550237486299
+F1: 0.2942130207034173
+Best Precision: 0.3781160016454134
+Best Recall: 0.2515788924265358
+Best F1: 0.29983515287238344