initial commit

Browse files

Files changed (19) hide show

exp/log-ppl/-2022-12-13-11-01-20 +6 -0
exp/log-ppl/-2022-12-13-11-02-25 +8 -0
exp/log-ppl/-2022-12-13-11-02-51 +8 -0
exp/log-ppl/-2022-12-13-11-04-12 +10 -0
exp/log-ppl/-2022-12-13-11-04-49 +10 -0
exp/log-ppl/-2022-12-13-11-05-28 +10 -0
exp/log-ppl/-2022-12-13-11-05-53 +10 -0
exp/log-ppl/-2022-12-14-11-25-53 +10 -0
exp/log-ppl/-2022-12-14-11-36-18 +10 -0
exp/log/log-train-2022-12-09-10-39-23-0 +0 -0
exp/log/log-train-2022-12-09-10-39-23-1 +0 -0
exp/log/log-train-2022-12-09-10-39-23-2 +0 -0
exp/log/log-train-2022-12-09-10-39-23-3 +0 -0
exp/log/log-train-2022-12-09-10-39-23-4 +0 -0
exp/log/log-train-2022-12-09-10-39-23-5 +0 -0
exp/log/log-train-2022-12-09-10-39-23-6 +0 -0
exp/log/log-train-2022-12-09-10-39-23-7 +0 -0
exp/pretrained.pt +3 -0
exp/tensorboard/events.out.tfevents.1670553563.de-74279-k2-train-1-0307195509-567fcb96d6-kdztg.3691056.0 +3 -0

exp/log-ppl/-2022-12-13-11-01-20 ADDED Viewed

	@@ -0,0 +1,6 @@

+2022-12-13 11:01:20,042 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:01:20,042 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 512, 'encoder_dim': 512, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 7, 'avg': 1, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 3, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:01:20,042 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:01:20,042 INFO [test.py:107] About to create model
+2022-12-13 11:01:20,197 INFO [model.py:68] Not tying weights
+2022-12-13 11:01:20,197 INFO [checkpoint.py:112] Loading checkpoint from transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt

exp/log-ppl/-2022-12-13-11-02-25 ADDED Viewed

	@@ -0,0 +1,8 @@

+2022-12-13 11:02:25,312 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:02:25,312 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 7, 'avg': 1, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 3, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:02:25,313 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:02:25,313 INFO [test.py:107] About to create model
+2022-12-13 11:02:25,581 INFO [model.py:68] Not tying weights
+2022-12-13 11:02:25,581 INFO [checkpoint.py:112] Loading checkpoint from transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt
+2022-12-13 11:02:32,425 INFO [test.py:138] Number of model parameters: 19665946
+2022-12-13 11:02:32,425 INFO [test.py:139] Number of model parameters (requires_grad): 19665946 (100.0%)

exp/log-ppl/-2022-12-13-11-02-51 ADDED Viewed

	@@ -0,0 +1,8 @@

+2022-12-13 11:02:51,854 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:02:51,855 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 7, 'avg': 1, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:02:51,855 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:02:51,855 INFO [test.py:107] About to create model
+2022-12-13 11:02:52,295 INFO [model.py:68] Not tying weights
+2022-12-13 11:02:52,295 INFO [checkpoint.py:112] Loading checkpoint from transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt
+2022-12-13 11:02:56,424 INFO [test.py:138] Number of model parameters: 98995638
+2022-12-13 11:02:56,424 INFO [test.py:139] Number of model parameters (requires_grad): 98995638 (100.0%)

exp/log-ppl/-2022-12-13-11-04-12 ADDED Viewed

	@@ -0,0 +1,10 @@

+2022-12-13 11:04:12,031 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:04:12,031 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 7, 'avg': 1, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:04:12,032 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:04:12,032 INFO [test.py:107] About to create model
+2022-12-13 11:04:12,535 INFO [model.py:68] Not tying weights
+2022-12-13 11:04:12,535 INFO [checkpoint.py:112] Loading checkpoint from transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt
+2022-12-13 11:04:16,633 INFO [test.py:138] Number of model parameters: 98995638
+2022-12-13 11:04:16,633 INFO [test.py:139] Number of model parameters (requires_grad): 98995638 (100.0%)
+2022-12-13 11:04:16,634 INFO [test.py:145] Loading LM test data from transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt
+2022-12-13 11:04:23,923 INFO [test.py:169] total nll: 480325.7807159424, num tokens: 211138, num sentences: 5559, ppl: 9.727

exp/log-ppl/-2022-12-13-11-04-49 ADDED Viewed

	@@ -0,0 +1,10 @@

+2022-12-13 11:04:49,777 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:04:49,778 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 7, 'avg': 2, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:04:49,778 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:04:49,778 INFO [test.py:107] About to create model
+2022-12-13 11:04:50,218 INFO [model.py:68] Not tying weights
+2022-12-13 11:04:50,218 INFO [test.py:128] averaging ['transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-6.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt']
+2022-12-13 11:05:00,842 INFO [test.py:138] Number of model parameters: 98995638
+2022-12-13 11:05:00,842 INFO [test.py:139] Number of model parameters (requires_grad): 98995638 (100.0%)
+2022-12-13 11:05:00,842 INFO [test.py:145] Loading LM test data from transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt
+2022-12-13 11:05:07,559 INFO [test.py:169] total nll: 477445.6896972656, num tokens: 211138, num sentences: 5559, ppl: 9.596

exp/log-ppl/-2022-12-13-11-05-28 ADDED Viewed

	@@ -0,0 +1,10 @@

+2022-12-13 11:05:28,709 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:05:28,709 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 8, 'avg': 2, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:05:28,710 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:05:28,710 INFO [test.py:107] About to create model
+2022-12-13 11:05:29,147 INFO [model.py:68] Not tying weights
+2022-12-13 11:05:29,147 INFO [test.py:128] averaging ['transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-8.pt']
+2022-12-13 11:05:35,810 INFO [test.py:138] Number of model parameters: 98995638
+2022-12-13 11:05:35,810 INFO [test.py:139] Number of model parameters (requires_grad): 98995638 (100.0%)
+2022-12-13 11:05:35,811 INFO [test.py:145] Loading LM test data from transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt
+2022-12-13 11:05:42,507 INFO [test.py:169] total nll: 476117.77613830566, num tokens: 211138, num sentences: 5559, ppl: 9.535

exp/log-ppl/-2022-12-13-11-05-53 ADDED Viewed

	@@ -0,0 +1,10 @@

+2022-12-13 11:05:53,701 INFO [test.py:98] Computing perplexity started
+2022-12-13 11:05:53,701 INFO [test.py:99] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.18', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2d82a1d9350263ae48a6953034ce570e3d5208c1', 'k2-git-date': 'Mon Aug 15 02:09:05 2022', 'lhotse-version': '1.5.0', 'torch-version': '1.10.1', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.9', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': '7131129-dirty', 'icefall-git-date': 'Fri Dec 9 15:46:21 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_test/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0602201035-5fb6d86964-mclm7', 'IP address': '10.177.74.202'}, 'epoch': 8, 'avg': 3, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-13 11:05:53,702 INFO [test.py:105] Device: cuda:0
+2022-12-13 11:05:53,702 INFO [test.py:107] About to create model
+2022-12-13 11:05:54,136 INFO [model.py:68] Not tying weights
+2022-12-13 11:05:54,136 INFO [test.py:128] averaging ['transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-6.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-7.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-8.pt']
+2022-12-13 11:06:00,482 INFO [test.py:138] Number of model parameters: 98995638
+2022-12-13 11:06:00,482 INFO [test.py:139] Number of model parameters (requires_grad): 98995638 (100.0%)
+2022-12-13 11:06:00,482 INFO [test.py:145] Loading LM test data from transformer_lm/libri_lm_training_bpe500/sorted_lm_data-test.pt
+2022-12-13 11:06:07,054 INFO [test.py:169] total nll: 475821.60218811035, num tokens: 211138, num sentences: 5559, ppl: 9.522

exp/log-ppl/-2022-12-14-11-25-53 ADDED Viewed

	@@ -0,0 +1,10 @@

+2022-12-14 11:25:53,118 INFO [compute_perplexity.py:117] Computing perplexity started
+2022-12-14 11:25:53,118 INFO [compute_perplexity.py:118] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.22', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '96c9a2aece2a3a7633da07740e24fa3d96f5498c', 'k2-git-date': 'Thu Nov 10 08:14:02 2022', 'lhotse-version': '1.10.0', 'torch-version': '1.12.1', 'torch-cuda-available': True, 'torch-cuda-version': '11.6', 'python-version': '3.8', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': 'e5ce80d-dirty', 'icefall-git-date': 'Wed Dec 14 10:40:00 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_latest/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_latest/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-1-0307195509-567fcb96d6-kdztg', 'IP address': '10.177.22.10'}, 'epoch': 11, 'avg': 4, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted-lm-data-libri-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-14 11:25:53,119 INFO [compute_perplexity.py:124] Device: cuda:0
+2022-12-14 11:25:53,119 INFO [compute_perplexity.py:126] About to create model
+2022-12-14 11:25:53,546 INFO [model.py:68] Not tying weights
+2022-12-14 11:25:53,546 INFO [compute_perplexity.py:147] averaging ['transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-8.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-9.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-10.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-11.pt']
+2022-12-14 11:26:00,332 INFO [compute_perplexity.py:157] Number of model parameters: 98995638
+2022-12-14 11:26:00,332 INFO [compute_perplexity.py:158] Number of model parameters (requires_grad): 98995638 (100.0%)
+2022-12-14 11:26:00,332 INFO [compute_perplexity.py:164] Loading LM test data from transformer_lm/libri_lm_training_bpe500/sorted-lm-data-libri-test.pt
+2022-12-14 11:26:07,015 INFO [compute_perplexity.py:188] total nll: 473216.4806213379, num tokens: 211138, num sentences: 5559, ppl: 9.405

exp/log-ppl/-2022-12-14-11-36-18 ADDED Viewed

	@@ -0,0 +1,10 @@

+2022-12-14 11:36:18,616 INFO [compute_perplexity.py:117] Computing perplexity started
+2022-12-14 11:36:18,616 INFO [compute_perplexity.py:118] {'max_sent_len': 100, 'sos_id': 1, 'eos_id': 1, 'blank_id': 0, 'lr': 0.001, 'weight_decay': 1e-06, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 200, 'reset_interval': 2000, 'valid_interval': 1000, 'nhead': 8, 'embedding_dim': 768, 'encoder_dim': 768, 'dim_feedforward': 2048, 'dropout': 0.1, 'env_info': {'k2-version': '1.22', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '96c9a2aece2a3a7633da07740e24fa3d96f5498c', 'k2-git-date': 'Thu Nov 10 08:14:02 2022', 'lhotse-version': '1.10.0', 'torch-version': '1.12.1', 'torch-cuda-available': True, 'torch-cuda-version': '11.6', 'python-version': '3.8', 'icefall-git-branch': 'transformer_lm', 'icefall-git-sha1': 'e5ce80d-dirty', 'icefall-git-date': 'Wed Dec 14 10:40:00 2022', 'icefall-path': '/ceph-data4/yangxiaoyu/softwares/icefall_development/icefall_transformer_lm', 'k2-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_latest/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/ceph-data4/yangxiaoyu/softwares/anaconda3/envs/k2_latest/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-1-0307195509-567fcb96d6-kdztg', 'IP address': '10.177.22.10'}, 'epoch': 11, 'avg': 3, 'exp_dir': PosixPath('transformer_lm/exp_full_libri_16layer_maxlen200_8gpu'), 'lm_data': PosixPath('transformer_lm/libri_lm_training_bpe500/sorted-lm-data-libri-test.pt'), 'vocab_size': 500, 'num_layers': 16, 'tie_weights': False, 'batch_size': 50}
+2022-12-14 11:36:18,616 INFO [compute_perplexity.py:124] Device: cuda:0
+2022-12-14 11:36:18,616 INFO [compute_perplexity.py:126] About to create model
+2022-12-14 11:36:19,041 INFO [model.py:68] Not tying weights
+2022-12-14 11:36:19,041 INFO [compute_perplexity.py:147] averaging ['transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-9.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-10.pt', 'transformer_lm/exp_full_libri_16layer_maxlen200_8gpu/epoch-11.pt']
+2022-12-14 11:36:23,796 INFO [compute_perplexity.py:157] Number of model parameters: 98995638
+2022-12-14 11:36:23,796 INFO [compute_perplexity.py:158] Number of model parameters (requires_grad): 98995638 (100.0%)
+2022-12-14 11:36:23,797 INFO [compute_perplexity.py:164] Loading LM test data from transformer_lm/libri_lm_training_bpe500/sorted-lm-data-libri-test.pt
+2022-12-14 11:36:30,579 INFO [compute_perplexity.py:188] total nll: 473318.6812133789, num tokens: 211138, num sentences: 5559, ppl: 9.410

exp/log/log-train-2022-12-09-10-39-23-0 ADDED Viewed