sanchit-gandhi HF staff commited on
Commit
8506a48
1 Parent(s): 99f75ad

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +50 -0
  2. checkpoint-144-epoch-3/optimizer.bin +3 -0
  3. checkpoint-144-epoch-3/pytorch_model.bin +3 -0
  4. checkpoint-144-epoch-3/random_states_0.pkl +3 -0
  5. checkpoint-144-epoch-3/scheduler.bin +3 -0
  6. checkpoint-216-epoch-5/optimizer.bin +3 -0
  7. checkpoint-216-epoch-5/pytorch_model.bin +3 -0
  8. checkpoint-216-epoch-5/random_states_0.pkl +3 -0
  9. checkpoint-216-epoch-5/scheduler.bin +3 -0
  10. checkpoint-288-epoch-7/optimizer.bin +3 -0
  11. checkpoint-288-epoch-7/pytorch_model.bin +3 -0
  12. checkpoint-288-epoch-7/random_states_0.pkl +3 -0
  13. checkpoint-288-epoch-7/scheduler.bin +3 -0
  14. checkpoint-312-epoch-7/optimizer.bin +3 -0
  15. checkpoint-312-epoch-7/pytorch_model.bin +3 -0
  16. checkpoint-312-epoch-7/random_states_0.pkl +3 -0
  17. checkpoint-312-epoch-7/scheduler.bin +3 -0
  18. checkpoint-72-epoch-1/optimizer.bin +3 -0
  19. checkpoint-72-epoch-1/pytorch_model.bin +3 -0
  20. checkpoint-72-epoch-1/random_states_0.pkl +3 -0
  21. checkpoint-72-epoch-1/scheduler.bin +3 -0
  22. config.json +276 -0
  23. generation_config.json +12 -0
  24. model.safetensors +3 -0
  25. preprocessor_config.json +10 -0
  26. run.sh +52 -0
  27. run_parler_tts_training.py +1763 -0
  28. special_tokens_map.json +125 -0
  29. spiece.model +3 -0
  30. tokenizer.json +0 -0
  31. tokenizer_config.json +941 -0
  32. wandb/debug-cli.sanchit.log +0 -0
  33. wandb/debug-internal.log +0 -0
  34. wandb/debug.log +35 -0
  35. wandb/run-20240513_204644-y7fy6vtp/files/conda-environment.yaml +248 -0
  36. wandb/run-20240513_204644-y7fy6vtp/files/config.yaml +86 -0
  37. wandb/run-20240513_204644-y7fy6vtp/files/output.log +187 -0
  38. wandb/run-20240513_204644-y7fy6vtp/files/requirements.txt +225 -0
  39. wandb/run-20240513_204644-y7fy6vtp/files/wandb-metadata.json +804 -0
  40. wandb/run-20240513_204644-y7fy6vtp/files/wandb-summary.json +1 -0
  41. wandb/run-20240513_204644-y7fy6vtp/logs/debug-internal.log +465 -0
  42. wandb/run-20240513_204644-y7fy6vtp/logs/debug.log +29 -0
  43. wandb/run-20240513_204644-y7fy6vtp/run-y7fy6vtp.wandb +0 -0
  44. wandb/run-20240513_205248-d781ddha/files/conda-environment.yaml +248 -0
  45. wandb/run-20240513_205248-d781ddha/files/config.yaml +86 -0
  46. wandb/run-20240513_205248-d781ddha/files/output.log +576 -0
  47. wandb/run-20240513_205248-d781ddha/files/requirements.txt +225 -0
  48. wandb/run-20240513_205248-d781ddha/files/wandb-metadata.json +804 -0
  49. wandb/run-20240513_205248-d781ddha/files/wandb-summary.json +1 -0
  50. wandb/run-20240513_205248-d781ddha/logs/debug-internal.log +554 -0
.gitattributes CHANGED
@@ -33,3 +33,53 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_3f7a73b6eb939439853b.wav filter=lfs diff=lfs merge=lfs -text
37
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_51c4feb7b25049501e37.wav filter=lfs diff=lfs merge=lfs -text
38
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_6d818be44006fe311b25.wav filter=lfs diff=lfs merge=lfs -text
39
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_7633a61e6ee80a024fb2.wav filter=lfs diff=lfs merge=lfs -text
40
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_855fad46eee4554f15a7.wav filter=lfs diff=lfs merge=lfs -text
41
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_dd8bdcb385062c83d9db.wav filter=lfs diff=lfs merge=lfs -text
42
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_117_ea76ce7bbfab47610005.wav filter=lfs diff=lfs merge=lfs -text
43
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_2abe63a219dd9980a311.wav filter=lfs diff=lfs merge=lfs -text
44
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_69610571800405c8afc4.wav filter=lfs diff=lfs merge=lfs -text
45
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_6cac4adea8fad0e75539.wav filter=lfs diff=lfs merge=lfs -text
46
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_830d908cf01e69e53001.wav filter=lfs diff=lfs merge=lfs -text
47
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_83726d791dfb671cadab.wav filter=lfs diff=lfs merge=lfs -text
48
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_c0a3f3d7cd9b5e6f1ff3.wav filter=lfs diff=lfs merge=lfs -text
49
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_195_c762fd4cdeb8dd603c93.wav filter=lfs diff=lfs merge=lfs -text
50
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_07ddb3b15d19fc0d28b8.wav filter=lfs diff=lfs merge=lfs -text
51
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_081798576741dcab4ed7.wav filter=lfs diff=lfs merge=lfs -text
52
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_1c742d222224ecad309d.wav filter=lfs diff=lfs merge=lfs -text
53
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_823c293a773a7747aae9.wav filter=lfs diff=lfs merge=lfs -text
54
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_b9550b7eaa369565c70f.wav filter=lfs diff=lfs merge=lfs -text
55
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_c4854e1ae7d92c3a58d5.wav filter=lfs diff=lfs merge=lfs -text
56
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_234_e8462d5ee69db7355157.wav filter=lfs diff=lfs merge=lfs -text
57
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_1b11908a3b16b03f6f28.wav filter=lfs diff=lfs merge=lfs -text
58
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_623fa20cd98668542c5a.wav filter=lfs diff=lfs merge=lfs -text
59
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_7e85e65244c43352d3ff.wav filter=lfs diff=lfs merge=lfs -text
60
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_85b843d1a8a6ba22faee.wav filter=lfs diff=lfs merge=lfs -text
61
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_89094caf461fb7b67ec7.wav filter=lfs diff=lfs merge=lfs -text
62
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_8f6478a9918ad2b1ae55.wav filter=lfs diff=lfs merge=lfs -text
63
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_273_9f4846c46a52d5e23c8d.wav filter=lfs diff=lfs merge=lfs -text
64
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_01c26787b801562a3789.wav filter=lfs diff=lfs merge=lfs -text
65
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_0ceabcd2840ac8898fe1.wav filter=lfs diff=lfs merge=lfs -text
66
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_337071167ae9bad07b71.wav filter=lfs diff=lfs merge=lfs -text
67
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_43fb61bde32a4b6742d4.wav filter=lfs diff=lfs merge=lfs -text
68
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_47c9ee8dc54086885fe9.wav filter=lfs diff=lfs merge=lfs -text
69
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_ac7b82097e1e55d1368c.wav filter=lfs diff=lfs merge=lfs -text
70
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_312_f460b32c3554b8680b92.wav filter=lfs diff=lfs merge=lfs -text
71
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_6481b4598446a5223728.wav filter=lfs diff=lfs merge=lfs -text
72
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_65c5863263ecc0063e7c.wav filter=lfs diff=lfs merge=lfs -text
73
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_75f77b4060ad41f6cd28.wav filter=lfs diff=lfs merge=lfs -text
74
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_91ec273933f07274531b.wav filter=lfs diff=lfs merge=lfs -text
75
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_a92c59803d84d120f9c8.wav filter=lfs diff=lfs merge=lfs -text
76
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_e334e92b9d2007210460.wav filter=lfs diff=lfs merge=lfs -text
77
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_39_e93fbf8d0314875d9a8a.wav filter=lfs diff=lfs merge=lfs -text
78
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_2a1aa9c4aa9b28aeff3d.wav filter=lfs diff=lfs merge=lfs -text
79
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_31d4a3506990aa644012.wav filter=lfs diff=lfs merge=lfs -text
80
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_6f247990e446903b7c69.wav filter=lfs diff=lfs merge=lfs -text
81
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_ccbbfb2e4ad764ed2d56.wav filter=lfs diff=lfs merge=lfs -text
82
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_e0a9392514f4f0b15b68.wav filter=lfs diff=lfs merge=lfs -text
83
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_e8936f475da43789ec82.wav filter=lfs diff=lfs merge=lfs -text
84
+ wandb/run-20240513_205708-e3sq5zz5/files/media/audio/Speech[[:space:]]samples/eval_78_ff389c0afad3d276441d.wav filter=lfs diff=lfs merge=lfs -text
85
+ wandb/run-20240513_205708-e3sq5zz5/run-e3sq5zz5.wandb filter=lfs diff=lfs merge=lfs -text
checkpoint-144-epoch-3/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e728938a020748172a043132006766f4a9aea154dfa221b5849c69bc2c026c4
3
+ size 3652763351
checkpoint-144-epoch-3/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab339eb359286de4937c3ac9a9a0ae12209842b4166eba2d68fe723a6a438aa2
3
+ size 2588462170
checkpoint-144-epoch-3/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d33f5216003698fa81aceb24693ae6ddf73c08c289065e1b1b011a817dd1f9
3
+ size 14408
checkpoint-144-epoch-3/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79932bdc8b3cbc9e9b6af8b11344bc95b9d62b0a651d065e4da34269040e929
3
+ size 1000
checkpoint-216-epoch-5/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97cc912a330eb8c6ac1412de5c46e2b9733351a81bfba0066ce67ba7d9fdd9e5
3
+ size 3652763351
checkpoint-216-epoch-5/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d544b282e7baef80c090fb270560cee9f423f25d2b57600c847d432dcd914ef
3
+ size 2588462170
checkpoint-216-epoch-5/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c388279c01ff0f2d04a22cb8fd6e5570c6cb372c2116007d42c83b24ef684a3b
3
+ size 14344
checkpoint-216-epoch-5/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f275707c92f62d3c3275afc2212e2c6176cdd14a425be9eb481c26b0a5ca7c
3
+ size 1000
checkpoint-288-epoch-7/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41cdea0eced4347c246a0e029cf82f4eb4bdc22ead7d900a856ec6cb9b7a898d
3
+ size 3652763351
checkpoint-288-epoch-7/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe89936ea1e9a28873323963f1143fb97b5a56c9fdbd0d8ec3a168f05be5c82
3
+ size 2588462170
checkpoint-288-epoch-7/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820ccb9181b74dbaba665aae0c5a53a3b77169c67cb021111e89f95714d28e6e
3
+ size 14344
checkpoint-288-epoch-7/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110faff40ae64c1ebce9c93b18f5c46479bb06243d71a1cb2b42895cff384963
3
+ size 1000
checkpoint-312-epoch-7/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0e25761980181284316f2cb1aac59524475d4fbc6f9c69c3dab6034f16ceb2
3
+ size 3652763351
checkpoint-312-epoch-7/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66f1ca1f6eb925ab73c9aac0f1238b913c05fb45f3c5efee7cbf04a92be5959
3
+ size 2588462170
checkpoint-312-epoch-7/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc40577c9284fcb8945a3ced5c05d62aa1f031cb88b316b2d9d467872270199
3
+ size 14408
checkpoint-312-epoch-7/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2d636bd7cf005a271ad3d3b8a6af3546951cd0ccda822994dc2edbed5b1f2f
3
+ size 1000
checkpoint-72-epoch-1/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6d2b1a649dd2c1bd454f3f83513e829024d8f3b7a439ec7cdb6682f23cfc6c
3
+ size 3652763351
checkpoint-72-epoch-1/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcbd40f4a0118dd67ad43790695b2692d6743c50409429188a60b6aa5a5dd523
3
+ size 2588462170
checkpoint-72-epoch-1/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac1e338ab3a6a60c25d38c4e95428abf67cf039a45489509b3e03c86383ad2a
3
+ size 14408
checkpoint-72-epoch-1/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de20d262a5716e12bcc1621b1656e3c6356daf72b9e347d23c6b0423a694e28d
3
+ size 1000
config.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "parler-tts/parler_tts_mini_v0.1",
3
+ "architectures": [
4
+ "ParlerTTSForConditionalGeneration"
5
+ ],
6
+ "audio_encoder": {
7
+ "_name_or_path": "ylacombe/dac_44khZ_8kbps",
8
+ "add_cross_attention": false,
9
+ "architectures": [
10
+ "DACModel"
11
+ ],
12
+ "bad_words_ids": null,
13
+ "begin_suppress_tokens": null,
14
+ "bos_token_id": null,
15
+ "chunk_size_feed_forward": 0,
16
+ "codebook_size": 1024,
17
+ "cross_attention_hidden_size": null,
18
+ "decoder_start_token_id": null,
19
+ "diversity_penalty": 0.0,
20
+ "do_sample": false,
21
+ "early_stopping": false,
22
+ "encoder_no_repeat_ngram_size": 0,
23
+ "eos_token_id": null,
24
+ "exponential_decay_length_penalty": null,
25
+ "finetuning_task": null,
26
+ "forced_bos_token_id": null,
27
+ "forced_eos_token_id": null,
28
+ "frame_rate": 86,
29
+ "id2label": {
30
+ "0": "LABEL_0",
31
+ "1": "LABEL_1"
32
+ },
33
+ "is_decoder": false,
34
+ "is_encoder_decoder": false,
35
+ "label2id": {
36
+ "LABEL_0": 0,
37
+ "LABEL_1": 1
38
+ },
39
+ "latent_dim": 1024,
40
+ "length_penalty": 1.0,
41
+ "max_length": 20,
42
+ "min_length": 0,
43
+ "model_bitrate": 8,
44
+ "model_type": "dac",
45
+ "no_repeat_ngram_size": 0,
46
+ "num_beam_groups": 1,
47
+ "num_beams": 1,
48
+ "num_codebooks": 9,
49
+ "num_return_sequences": 1,
50
+ "output_attentions": false,
51
+ "output_hidden_states": false,
52
+ "output_scores": false,
53
+ "pad_token_id": null,
54
+ "prefix": null,
55
+ "problem_type": null,
56
+ "pruned_heads": {},
57
+ "remove_invalid_values": false,
58
+ "repetition_penalty": 1.0,
59
+ "return_dict": true,
60
+ "return_dict_in_generate": false,
61
+ "sampling_rate": 44100,
62
+ "sep_token_id": null,
63
+ "suppress_tokens": null,
64
+ "task_specific_params": null,
65
+ "temperature": 1.0,
66
+ "tf_legacy_loss": false,
67
+ "tie_encoder_decoder": false,
68
+ "tie_word_embeddings": true,
69
+ "tokenizer_class": null,
70
+ "top_k": 50,
71
+ "top_p": 1.0,
72
+ "torch_dtype": "float32",
73
+ "torchscript": false,
74
+ "typical_p": 1.0,
75
+ "use_bfloat16": false
76
+ },
77
+ "decoder": {
78
+ "_name_or_path": "/fsx/yoach/tmp/artefacts/decoder_400M/",
79
+ "activation_dropout": 0.0,
80
+ "activation_function": "gelu",
81
+ "add_cross_attention": true,
82
+ "architectures": [
83
+ "ParlerTTSForCausalLM"
84
+ ],
85
+ "attention_dropout": 0.0,
86
+ "bad_words_ids": null,
87
+ "begin_suppress_tokens": null,
88
+ "bos_token_id": 1025,
89
+ "chunk_size_feed_forward": 0,
90
+ "cross_attention_hidden_size": null,
91
+ "decoder_start_token_id": null,
92
+ "diversity_penalty": 0.0,
93
+ "do_sample": false,
94
+ "dropout": 0.1,
95
+ "early_stopping": false,
96
+ "encoder_no_repeat_ngram_size": 0,
97
+ "eos_token_id": 1024,
98
+ "exponential_decay_length_penalty": null,
99
+ "ffn_dim": 4096,
100
+ "finetuning_task": null,
101
+ "forced_bos_token_id": null,
102
+ "forced_eos_token_id": null,
103
+ "hidden_size": 1024,
104
+ "id2label": {
105
+ "0": "LABEL_0",
106
+ "1": "LABEL_1"
107
+ },
108
+ "initializer_factor": 0.02,
109
+ "is_decoder": true,
110
+ "is_encoder_decoder": false,
111
+ "label2id": {
112
+ "LABEL_0": 0,
113
+ "LABEL_1": 1
114
+ },
115
+ "layerdrop": 0.0,
116
+ "length_penalty": 1.0,
117
+ "max_length": 20,
118
+ "max_position_embeddings": 4096,
119
+ "min_length": 0,
120
+ "model_type": "parler_tts_decoder",
121
+ "no_repeat_ngram_size": 0,
122
+ "num_attention_heads": 16,
123
+ "num_beam_groups": 1,
124
+ "num_beams": 1,
125
+ "num_codebooks": 9,
126
+ "num_hidden_layers": 24,
127
+ "num_return_sequences": 1,
128
+ "output_attentions": false,
129
+ "output_hidden_states": false,
130
+ "output_scores": false,
131
+ "pad_token_id": 1024,
132
+ "prefix": null,
133
+ "problem_type": null,
134
+ "pruned_heads": {},
135
+ "remove_invalid_values": false,
136
+ "repetition_penalty": 1.0,
137
+ "return_dict": true,
138
+ "return_dict_in_generate": false,
139
+ "scale_embedding": false,
140
+ "sep_token_id": null,
141
+ "suppress_tokens": null,
142
+ "task_specific_params": null,
143
+ "temperature": 1.0,
144
+ "tf_legacy_loss": false,
145
+ "tie_encoder_decoder": false,
146
+ "tie_word_embeddings": false,
147
+ "tokenizer_class": null,
148
+ "top_k": 50,
149
+ "top_p": 1.0,
150
+ "torch_dtype": "float32",
151
+ "torchscript": false,
152
+ "typical_p": 1.0,
153
+ "use_bfloat16": false,
154
+ "use_cache": true,
155
+ "vocab_size": 1088
156
+ },
157
+ "decoder_start_token_id": 1025,
158
+ "is_encoder_decoder": true,
159
+ "model_type": "parler_tts",
160
+ "pad_token_id": 1024,
161
+ "text_encoder": {
162
+ "_name_or_path": "google/flan-t5-base",
163
+ "add_cross_attention": false,
164
+ "architectures": [
165
+ "T5ForConditionalGeneration"
166
+ ],
167
+ "bad_words_ids": null,
168
+ "begin_suppress_tokens": null,
169
+ "bos_token_id": null,
170
+ "chunk_size_feed_forward": 0,
171
+ "classifier_dropout": 0.0,
172
+ "cross_attention_hidden_size": null,
173
+ "d_ff": 2048,
174
+ "d_kv": 64,
175
+ "d_model": 768,
176
+ "decoder_start_token_id": 0,
177
+ "dense_act_fn": "gelu_new",
178
+ "diversity_penalty": 0.0,
179
+ "do_sample": false,
180
+ "dropout_rate": 0.1,
181
+ "early_stopping": false,
182
+ "encoder_no_repeat_ngram_size": 0,
183
+ "eos_token_id": 1,
184
+ "exponential_decay_length_penalty": null,
185
+ "feed_forward_proj": "gated-gelu",
186
+ "finetuning_task": null,
187
+ "forced_bos_token_id": null,
188
+ "forced_eos_token_id": null,
189
+ "id2label": {
190
+ "0": "LABEL_0",
191
+ "1": "LABEL_1"
192
+ },
193
+ "initializer_factor": 1.0,
194
+ "is_decoder": false,
195
+ "is_encoder_decoder": true,
196
+ "is_gated_act": true,
197
+ "label2id": {
198
+ "LABEL_0": 0,
199
+ "LABEL_1": 1
200
+ },
201
+ "layer_norm_epsilon": 1e-06,
202
+ "length_penalty": 1.0,
203
+ "max_length": 20,
204
+ "min_length": 0,
205
+ "model_type": "t5",
206
+ "n_positions": 512,
207
+ "no_repeat_ngram_size": 0,
208
+ "num_beam_groups": 1,
209
+ "num_beams": 1,
210
+ "num_decoder_layers": 12,
211
+ "num_heads": 12,
212
+ "num_layers": 12,
213
+ "num_return_sequences": 1,
214
+ "output_attentions": false,
215
+ "output_hidden_states": false,
216
+ "output_past": true,
217
+ "output_scores": false,
218
+ "pad_token_id": 0,
219
+ "prefix": null,
220
+ "problem_type": null,
221
+ "pruned_heads": {},
222
+ "relative_attention_max_distance": 128,
223
+ "relative_attention_num_buckets": 32,
224
+ "remove_invalid_values": false,
225
+ "repetition_penalty": 1.0,
226
+ "return_dict": true,
227
+ "return_dict_in_generate": false,
228
+ "sep_token_id": null,
229
+ "suppress_tokens": null,
230
+ "task_specific_params": {
231
+ "summarization": {
232
+ "early_stopping": true,
233
+ "length_penalty": 2.0,
234
+ "max_length": 200,
235
+ "min_length": 30,
236
+ "no_repeat_ngram_size": 3,
237
+ "num_beams": 4,
238
+ "prefix": "summarize: "
239
+ },
240
+ "translation_en_to_de": {
241
+ "early_stopping": true,
242
+ "max_length": 300,
243
+ "num_beams": 4,
244
+ "prefix": "translate English to German: "
245
+ },
246
+ "translation_en_to_fr": {
247
+ "early_stopping": true,
248
+ "max_length": 300,
249
+ "num_beams": 4,
250
+ "prefix": "translate English to French: "
251
+ },
252
+ "translation_en_to_ro": {
253
+ "early_stopping": true,
254
+ "max_length": 300,
255
+ "num_beams": 4,
256
+ "prefix": "translate English to Romanian: "
257
+ }
258
+ },
259
+ "temperature": 1.0,
260
+ "tf_legacy_loss": false,
261
+ "tie_encoder_decoder": false,
262
+ "tie_word_embeddings": false,
263
+ "tokenizer_class": null,
264
+ "top_k": 50,
265
+ "top_p": 1.0,
266
+ "torch_dtype": null,
267
+ "torchscript": false,
268
+ "typical_p": 1.0,
269
+ "use_bfloat16": false,
270
+ "use_cache": true,
271
+ "vocab_size": 32128
272
+ },
273
+ "torch_dtype": "float32",
274
+ "transformers_version": "4.41.0.dev0",
275
+ "vocab_size": 32128
276
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1025,
4
+ "decoder_start_token_id": 1025,
5
+ "do_sample": true,
6
+ "eos_token_id": 1024,
7
+ "guidance_scale": 1.0,
8
+ "max_length": 2580,
9
+ "min_new_tokens": 50,
10
+ "pad_token_id": 1024,
11
+ "transformers_version": "4.41.0.dev0"
12
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e39490720427289907fbf94d35a0f3d3723fee73e27bd4f09de51715d4d1eb3
3
+ size 2588215392
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length_s": null,
3
+ "feature_extractor_type": "EncodecFeatureExtractor",
4
+ "feature_size": 1,
5
+ "overlap": null,
6
+ "padding_side": "right",
7
+ "padding_value": 0.0,
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 44100
10
+ }
run.sh ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate launch ./run_parler_tts_training.py \
2
+ --model_name_or_path "parler-tts/parler_tts_mini_v0.1" \
3
+ --feature_extractor_name "parler-tts/dac_44khZ_8kbps" \
4
+ --description_tokenizer_name "parler-tts/parler_tts_mini_v0.1" \
5
+ --prompt_tokenizer_name "parler-tts/parler_tts_mini_v0.1" \
6
+ --report_to "wandb" \
7
+ --overwrite_output_dir true \
8
+ --train_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal" \
9
+ --train_metadata_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral" \
10
+ --train_dataset_config_name "read" \
11
+ --train_split_name "train[:-35]" \
12
+ --eval_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal" \
13
+ --eval_metadata_dataset_name "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral" \
14
+ --eval_dataset_config_name "read" \
15
+ --eval_split_name "train" \
16
+ --max_eval_samples 8 \
17
+ --per_device_eval_batch_size 16 \
18
+ --target_audio_column_name "audio" \
19
+ --description_column_name "text_description" \
20
+ --prompt_column_name "text" \
21
+ --max_duration_in_seconds 30.0 \
22
+ --min_duration_in_seconds 2.0 \
23
+ --max_text_length 400 \
24
+ --preprocessing_num_workers 2 \
25
+ --do_train true \
26
+ --num_train_epochs 8 \
27
+ --gradient_accumulation_steps 8 \
28
+ --gradient_checkpointing true \
29
+ --per_device_train_batch_size 16 \
30
+ --learning_rate 0.00008 \
31
+ --adam_beta1 0.9 \
32
+ --adam_beta2 0.99 \
33
+ --weight_decay 0.01 \
34
+ --lr_scheduler_type "cosine" \
35
+ --warmup_steps 250 \
36
+ --logging_steps 2 \
37
+ --freeze_text_encoder true \
38
+ --audio_encoder_per_device_batch_size 4 \
39
+ --dtype "bfloat16" \
40
+ --seed 456 \
41
+ --output_dir "./" \
42
+ --temporary_save_to_disk "../audio_code_tmp_concat/" \
43
+ --save_to_disk "../tmp_dataset_audio_concat/" \
44
+ --dataloader_num_workers 4 \
45
+ --do_eval \
46
+ --predict_with_generate \
47
+ --include_inputs_for_metrics \
48
+ --save_strategy "steps" \
49
+ --save_steps 72 \
50
+ --evaluation_strategy "epoch" \
51
+ --save_total_limit 5 \
52
+ --group_by_length true
run_parler_tts_training.py ADDED
@@ -0,0 +1,1763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ """ Train Parler-TTS using 🤗 Accelerate"""
18
+
19
+ import logging
20
+ import os
21
+ import re
22
+ import shutil
23
+ import sys
24
+ import time
25
+ from dataclasses import dataclass, field
26
+ from datetime import timedelta
27
+ from pathlib import Path
28
+ from typing import Dict, List, Optional, Set, Union
29
+
30
+ import datasets
31
+ import evaluate
32
+ import numpy as np
33
+ import torch
34
+ import transformers
35
+ from accelerate import Accelerator
36
+ from accelerate.utils import AutocastKwargs, InitProcessGroupKwargs, TorchDynamoPlugin, set_seed
37
+ from accelerate.utils.memory import release_memory
38
+ from datasets import Dataset, DatasetDict, IterableDataset, concatenate_datasets, interleave_datasets, load_dataset
39
+ from huggingface_hub import Repository, create_repo
40
+ from multiprocess import set_start_method
41
+ from torch.utils.data import DataLoader
42
+ from tqdm import tqdm
43
+ from transformers import (
44
+ AutoFeatureExtractor,
45
+ AutoModel,
46
+ AutoProcessor,
47
+ AutoTokenizer,
48
+ HfArgumentParser,
49
+ Seq2SeqTrainingArguments,
50
+ pipeline,
51
+ )
52
+ from transformers.optimization import get_scheduler
53
+ from transformers.trainer_pt_utils import LengthGroupedSampler
54
+ from transformers.utils import send_example_telemetry
55
+ from wandb import Audio
56
+
57
+ from parler_tts import (
58
+ ParlerTTSConfig,
59
+ ParlerTTSForConditionalGeneration,
60
+ build_delay_pattern_mask,
61
+ )
62
+
63
+
64
+ logger = logging.getLogger(__name__)
65
+
66
+
67
+ def list_field(default=None, metadata=None):
68
+ return field(default_factory=lambda: default, metadata=metadata)
69
+
70
+
71
+ _RE_CHECKPOINT = re.compile(r"^checkpoint-(\d+)-epoch-(\d+)$")
72
+
73
+
74
+ def get_last_checkpoint(folder):
75
+ content = os.listdir(folder)
76
+ checkpoints = [
77
+ path
78
+ for path in content
79
+ if _RE_CHECKPOINT.search(path) is not None and os.path.isdir(os.path.join(folder, path))
80
+ ]
81
+ if len(checkpoints) == 0:
82
+ return
83
+ return os.path.join(folder, max(checkpoints, key=lambda x: int(_RE_CHECKPOINT.search(x).groups()[0])))
84
+
85
+
86
+ def sorted_checkpoints(output_dir=None, checkpoint_prefix="checkpoint") -> List[str]:
87
+ """Helper function to sort saved checkpoints from oldest to newest."""
88
+ ordering_and_checkpoint_path = []
89
+
90
+ glob_checkpoints = [str(x) for x in Path(output_dir).glob(f"{checkpoint_prefix}-*") if os.path.isdir(x)]
91
+
92
+ for path in glob_checkpoints:
93
+ regex_match = re.match(f".*{checkpoint_prefix}-([0-9]+)", path)
94
+ if regex_match is not None and regex_match.groups() is not None:
95
+ ordering_and_checkpoint_path.append((int(regex_match.groups()[0]), path))
96
+
97
+ checkpoints_sorted = sorted(ordering_and_checkpoint_path)
98
+ checkpoints_sorted = [checkpoint[1] for checkpoint in checkpoints_sorted]
99
+ return checkpoints_sorted
100
+
101
+
102
+ def rotate_checkpoints(save_total_limit=None, output_dir=None, checkpoint_prefix="checkpoint") -> None:
103
+ """Helper function to delete old checkpoints."""
104
+ if save_total_limit is None or save_total_limit <= 0:
105
+ return
106
+ # Check if we should delete older checkpoint(s)
107
+ checkpoints_sorted = sorted_checkpoints(output_dir=output_dir, checkpoint_prefix=checkpoint_prefix)
108
+ if len(checkpoints_sorted) <= save_total_limit:
109
+ return
110
+
111
+ number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - save_total_limit)
112
+ checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
113
+ for checkpoint in checkpoints_to_be_deleted:
114
+ logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
115
+ shutil.rmtree(checkpoint, ignore_errors=True)
116
+
117
+
118
+ def log_metric(
119
+ accelerator,
120
+ metrics: Dict,
121
+ train_time: float,
122
+ step: int,
123
+ epoch: int,
124
+ learning_rate: float = None,
125
+ prefix: str = "train",
126
+ ):
127
+ """Helper function to log all training/evaluation metrics with the correct prefixes and styling."""
128
+ log_metrics = {}
129
+ for k, v in metrics.items():
130
+ log_metrics[f"{prefix}/{k}"] = v
131
+ log_metrics[f"{prefix}/time"] = train_time
132
+ log_metrics[f"{prefix}/epoch"] = epoch
133
+ if learning_rate is not None:
134
+ log_metrics[f"{prefix}/learning_rate"] = learning_rate
135
+ accelerator.log(log_metrics, step=step)
136
+
137
+
138
+ def log_pred(
139
+ accelerator,
140
+ pred_descriptions: List[str],
141
+ pred_prompts: List[str],
142
+ transcriptions: List[str],
143
+ audios: List[torch.Tensor],
144
+ sampling_rate: int,
145
+ step: int,
146
+ prefix: str = "eval",
147
+ num_lines: int = 200000,
148
+ ):
149
+ """Helper function to log target/predicted transcriptions to weights and biases (wandb)."""
150
+ if accelerator.is_main_process:
151
+ wandb_tracker = accelerator.get_tracker("wandb")
152
+ # pretty name for current step: step 50000 -> step 50k
153
+ cur_step_pretty = f"{int(step // 1000)}k" if step > 1000 else step
154
+ prefix_pretty = prefix.replace("/", "-")
155
+
156
+ # convert str data to a wandb compatible format
157
+ str_data = [[pred_descriptions[i], pred_prompts[i], transcriptions[i]] for i in range(len(pred_descriptions))]
158
+ # log as a table with the appropriate headers
159
+ wandb_tracker.log_table(
160
+ table_name=f"predictions/{prefix_pretty}-step-{cur_step_pretty}",
161
+ columns=["Target descriptions", "Target prompts", "Predicted transcriptions"],
162
+ data=str_data[:num_lines],
163
+ step=step,
164
+ commit=False,
165
+ )
166
+
167
+ # wandb can only loads 100 audios per step
168
+ wandb_tracker.log(
169
+ {
170
+ f"Speech samples/{prefix}": [
171
+ Audio(
172
+ audio,
173
+ caption=f"{pred_prompts[i]} --- DESCRIPTION: {pred_descriptions[i]}",
174
+ sample_rate=sampling_rate,
175
+ )
176
+ for (i, audio) in enumerate(audios[: min(len(audios), 100)])
177
+ ]
178
+ },
179
+ step=step,
180
+ )
181
+
182
+
183
+ @dataclass
184
+ class ModelArguments:
185
+ """
186
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
187
+ """
188
+
189
+ model_name_or_path: str = field(
190
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
191
+ )
192
+ config_name: Optional[str] = field(
193
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
194
+ )
195
+ feature_extractor_name: Optional[str] = field(
196
+ default=None, metadata={"help": "Pretrained feature extractor name or path if not the same as model_name"}
197
+ )
198
+ description_tokenizer_name: Optional[str] = field(
199
+ default=None, metadata={"help": "Pretrained description tokenizer name or path if not the same as model_name"}
200
+ )
201
+ prompt_tokenizer_name: Optional[str] = field(
202
+ default=None,
203
+ metadata={"help": "Pretrained prompt tokenizer name or path if not the same as description_tokenizer_name"},
204
+ )
205
+ cache_dir: Optional[str] = field(
206
+ default=None,
207
+ metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
208
+ )
209
+ use_fast_tokenizer: bool = field(
210
+ default=True,
211
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
212
+ )
213
+ model_revision: str = field(
214
+ default="main",
215
+ metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
216
+ )
217
+ pad_token_id: int = field(
218
+ default=None,
219
+ metadata={"help": "If specified, change the model pad token id."},
220
+ )
221
+ decoder_start_token_id: int = field(
222
+ default=None,
223
+ metadata={"help": "If specified, change the model decoder start token id."},
224
+ )
225
+ freeze_text_encoder: bool = field(
226
+ default=False,
227
+ metadata={"help": "Whether to freeze the text encoder."},
228
+ )
229
+ do_sample: bool = field(
230
+ default=True,
231
+ metadata={"help": "Whether to do sampling or greedy decoding."},
232
+ )
233
+ temperature: float = field(
234
+ default=1.0,
235
+ metadata={"help": "Temperature if sampling."},
236
+ )
237
+ max_length: int = field(
238
+ default=2580,
239
+ metadata={"help": "Generation max length."},
240
+ )
241
+ bandwidth: float = field(
242
+ default=6,
243
+ metadata={"help": "Audio encoder bandwidth."},
244
+ )
245
+ asr_model_name_or_path: str = field(
246
+ default="distil-whisper/distil-large-v2",
247
+ metadata={
248
+ "help": "Used to compute WER during evaluation. Path to pretrained model or model identifier from huggingface.co/models"
249
+ },
250
+ )
251
+ clap_model_name_or_path: str = field(
252
+ default="laion/larger_clap_music_and_speech",
253
+ metadata={
254
+ "help": "Used to compute audio similarity during evaluation. Path to pretrained model or model identifier from huggingface.co/models"
255
+ },
256
+ )
257
+
258
+
259
+ @dataclass
260
+ class DataTrainingArguments:
261
+ """
262
+ Arguments pertaining to what data we are going to input our model for training and eval.
263
+
264
+ Using `HfArgumentParser` we can turn this class
265
+ into argparse arguments to be able to specify them on
266
+ the command line.
267
+ """
268
+
269
+ train_dataset_name: str = field(
270
+ default=None,
271
+ metadata={
272
+ "help": "The name of the training dataset to use (via the datasets library). Load and combine "
273
+ "multiple datasets by separating dataset ids by a '+' symbol. For example, to load and combine "
274
+ " librispeech and common voice, set `train_dataset_name='librispeech_asr+common_voice'`."
275
+ },
276
+ )
277
+ train_dataset_config_name: Optional[str] = field(
278
+ default=None,
279
+ metadata={
280
+ "help": "The configuration name of the training dataset to use (via the datasets library). Load and combine "
281
+ "multiple datasets by separating dataset configs by a '+' symbol."
282
+ },
283
+ )
284
+ train_split_name: str = field(
285
+ default="train",
286
+ metadata={
287
+ "help": ("The name of the training data set split to use (via the datasets library). Defaults to 'train'")
288
+ },
289
+ )
290
+ train_dataset_samples: str = field(
291
+ default=None,
292
+ metadata={
293
+ "help": "Number of samples in the training data. Load and combine "
294
+ "multiple datasets by separating dataset samples by a '+' symbol."
295
+ },
296
+ )
297
+ train_metadata_dataset_name: str = field(
298
+ default=None,
299
+ metadata={
300
+ "help": "The name of the metadata training dataset to use (via the datasets library). Load and combine "
301
+ "multiple datasets by separating dataset ids by a '+' symbol. For example, to load and combine "
302
+ " librispeech and common voice, set `train_dataset_name='librispeech_asr+common_voice'`."
303
+ },
304
+ )
305
+ eval_dataset_name: str = field(
306
+ default=None,
307
+ metadata={
308
+ "help": "The name of the evaluation dataset to use (via the datasets library). Defaults to the training dataset name if unspecified."
309
+ },
310
+ )
311
+ eval_dataset_config_name: Optional[str] = field(
312
+ default=None,
313
+ metadata={
314
+ "help": "The configuration name of the evaluation dataset to use (via the datasets library). Defaults to the training dataset config name if unspecified"
315
+ },
316
+ )
317
+ eval_split_name: str = field(
318
+ default="test",
319
+ metadata={
320
+ "help": "The name of the evaluation data set split to use (via the datasets library). Defaults to 'test'"
321
+ },
322
+ )
323
+ eval_metadata_dataset_name: str = field(
324
+ default=None,
325
+ metadata={
326
+ "help": "The name of the metadata training dataset to use (via the datasets library). Load and combine "
327
+ "multiple datasets by separating dataset ids by a '+' symbol. For example, to load and combine "
328
+ " librispeech and common voice, set `train_dataset_name='librispeech_asr+common_voice'`."
329
+ },
330
+ )
331
+ target_audio_column_name: str = field(
332
+ default="audio",
333
+ metadata={"help": "The name of the dataset column containing the target audio data. Defaults to 'audio'"},
334
+ )
335
+ description_column_name: str = field(
336
+ default=None,
337
+ metadata={"help": "The name of the dataset column containing the description text data. Defaults to 'None'."},
338
+ )
339
+ prompt_column_name: str = field(
340
+ default=None,
341
+ metadata={"help": "The name of the dataset column containing the prompt text data. Defaults to 'None'."},
342
+ )
343
+ overwrite_cache: bool = field(
344
+ default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
345
+ )
346
+ preprocessing_num_workers: Optional[int] = field(
347
+ default=None,
348
+ metadata={"help": "The number of processes to use for the preprocessing."},
349
+ )
350
+ max_train_samples: Optional[int] = field(
351
+ default=None,
352
+ metadata={
353
+ "help": (
354
+ "For debugging purposes or quicker training, truncate the number of training examples to this "
355
+ "value if set."
356
+ )
357
+ },
358
+ )
359
+ max_eval_samples: Optional[int] = field(
360
+ default=None,
361
+ metadata={
362
+ "help": (
363
+ "For debugging purposes or quicker training, truncate the number of validation examples to this "
364
+ "value if set."
365
+ )
366
+ },
367
+ )
368
+ max_duration_in_seconds: float = field(
369
+ default=35.0,
370
+ metadata={
371
+ "help": (
372
+ "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`."
373
+ "Also, used to set maximum audio length if `pad_to_max_length=True`."
374
+ )
375
+ },
376
+ )
377
+ min_duration_in_seconds: float = field(
378
+ default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
379
+ )
380
+ max_text_length: int = field(
381
+ default=500, metadata={"help": "If set, max description lengths in number of characters."}
382
+ )
383
+ max_prompt_token_length: int = field(
384
+ default=None,
385
+ metadata={
386
+ "help": (
387
+ "If set, filter samples with prompts that are longer than `max_prompt_token_length` tokens."
388
+ "Also, used to set maximum prompt token length if `pad_to_max_length=True`."
389
+ )
390
+ },
391
+ )
392
+ max_description_token_length: int = field(
393
+ default=None,
394
+ metadata={
395
+ "help": (
396
+ "If set, filter samples with descriptions that are longer than `max_description_token_length` tokens."
397
+ "Also, used to set maximum desription token length if `pad_to_max_length=True`."
398
+ )
399
+ },
400
+ )
401
+ pad_to_max_length: bool = field(
402
+ default=False,
403
+ metadata={
404
+ "help": (
405
+ "If `True`, pad audio, prompt and description to a maximum length set with respectively "
406
+ "`max_duration_in_seconds`, `max_prompt_token_length`, `max_description_token_length`."
407
+ )
408
+ },
409
+ )
410
+ preprocessing_only: bool = field(
411
+ default=False,
412
+ metadata={
413
+ "help": (
414
+ "Whether to only do data preprocessing and skip training. This is especially useful when data"
415
+ " preprocessing errors out in distributed training due to timeout. In this case, one should run the"
416
+ " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets"
417
+ " can consequently be loaded in distributed training."
418
+ " In this training script, `save_to_disk` must be set to the path in which the dataset should be saved. "
419
+ )
420
+ },
421
+ )
422
+ token: str = field(
423
+ default=None,
424
+ metadata={
425
+ "help": (
426
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
427
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
428
+ )
429
+ },
430
+ )
431
+ use_auth_token: bool = field(
432
+ default=None,
433
+ metadata={
434
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
435
+ },
436
+ )
437
+ trust_remote_code: bool = field(
438
+ default=False,
439
+ metadata={
440
+ "help": (
441
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
442
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
443
+ "execute code present on the Hub on your local machine."
444
+ )
445
+ },
446
+ )
447
+ add_audio_samples_to_wandb: bool = field(
448
+ default=False,
449
+ metadata={"help": "If set and if `wandb` in args.report_to, will add generated audio samples to wandb logs."},
450
+ )
451
+ id_column_name: str = field(default=None, metadata={"help": "id column name."})
452
+ wandb_project: str = field(
453
+ default="parler-speech",
454
+ metadata={"help": "The name of the wandb project."},
455
+ )
456
+ save_to_disk: str = field(
457
+ default=None,
458
+ metadata={
459
+ "help": "If set, will save the dataset to this path if this is an empyt folder. If not empty, will load the datasets from it."
460
+ },
461
+ )
462
+ temporary_save_to_disk: str = field(default=None, metadata={"help": "Temporarily save audio labels here."})
463
+ pad_to_multiple_of: Optional[int] = field(
464
+ default=2,
465
+ metadata={"help": ("Pad to multiple of for tokenizers.")},
466
+ )
467
+
468
+
469
+ @dataclass
470
+ class ParlerTTSTrainingArguments(Seq2SeqTrainingArguments):
471
+ dtype: Optional[str] = field(
472
+ default="float32",
473
+ metadata={
474
+ "help": (
475
+ "The data type (dtype) in which to run training. One of `float32` (full-precision), "
476
+ "`float16` or `bfloat16` (both half-precision)."
477
+ )
478
+ },
479
+ )
480
+ audio_encoder_per_device_batch_size: int = field(
481
+ default=8,
482
+ metadata={"help": ("Specify the batch size of the audio encoding pre-processing steps.")},
483
+ )
484
+
485
+
486
+ @dataclass
487
+ class DataCollatorEncodecWithPadding:
488
+ """
489
+ Data collator that will dynamically pad the inputs received to the longest sequence in the batch or
490
+ to `max_length` if `max_length` is set and `padding=max_length`.
491
+ """
492
+
493
+ feature_extractor: AutoFeatureExtractor
494
+ audio_column_name: str
495
+ feature_extractor_input_name: Optional[str] = "input_values"
496
+ max_length: Optional[int] = None
497
+ padding: Optional[str] = "longest"
498
+
499
+ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
500
+ # split inputs and labels since they have to be of different lengths and need
501
+ # different padding methods
502
+ audios = [feature[self.audio_column_name]["array"] for feature in features]
503
+ len_audio = [len(audio) for audio in audios]
504
+
505
+ batch = self.feature_extractor(audios, return_tensors="pt", padding=self.padding, max_length=self.max_length, sampling_rate=self.feature_extractor.sampling_rate)
506
+ batch["len_audio"] = torch.tensor(len_audio).unsqueeze(1)
507
+ return batch
508
+
509
+
510
+ @dataclass
511
+ class DataCollatorParlerTTSWithPadding:
512
+ """
513
+ Data collator that will dynamically pad the inputs received.
514
+ Args:
515
+ prompt_tokenizer (:class:`~transformers.AutoTokenizer`)
516
+ The prompt_tokenizer used for proccessing the data.
517
+ description_tokenizer (:class:`~transformers.AutoTokenizer`)
518
+ The description_tokenizer used for proccessing the data.
519
+ padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
520
+ Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
521
+ among:
522
+ * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
523
+ sequence if provided).
524
+ * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
525
+ maximum acceptable input length for the model if that argument is not provided.
526
+ * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
527
+ different lengths).
528
+ pad_to_multiple_of (:obj:`int`, `optional`):
529
+ If set will pad the sequence to a multiple of the provided value.
530
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
531
+ 7.5 (Volta).
532
+ """
533
+
534
+ prompt_tokenizer: AutoTokenizer
535
+ description_tokenizer: AutoTokenizer
536
+ padding: Union[bool, str] = "longest"
537
+ pad_to_multiple_of: Optional[int] = None
538
+ prompt_max_length: Optional[int] = None
539
+ description_max_length: Optional[int] = None
540
+ audio_max_length: Optional[int] = None
541
+
542
+ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
543
+ # split inputs and labels since they have to be of different lengths and need
544
+ # different padding methods
545
+
546
+ labels = [torch.tensor(feature["labels"]).transpose(0, 1) for feature in features]
547
+ # (bsz, seq_len, num_codebooks)
548
+ labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=-100)
549
+ if self.audio_max_length is not None and self.padding == "max_length":
550
+ labels = torch.nn.functional.pad(labels, pad=(0, 0, 0, max(self.audio_max_length - labels.shape[1], 0)))
551
+
552
+ input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
553
+
554
+ input_ids = self.description_tokenizer.pad(
555
+ input_ids,
556
+ return_tensors="pt",
557
+ padding=self.padding,
558
+ pad_to_multiple_of=self.pad_to_multiple_of,
559
+ max_length=self.description_max_length,
560
+ )
561
+
562
+ batch = {"labels": labels, **input_ids}
563
+
564
+ if self.audio_max_length is not None and self.padding == "max_length":
565
+ # if we do torch.compile, we need to also specify the attention_mask
566
+ decoder_attention_mask = torch.ones(labels.shape[:2], dtype=input_ids["attention_mask"].dtype)
567
+ batch["decoder_attention_mask"] = decoder_attention_mask
568
+
569
+ prompt_input_ids = [{"input_ids": feature["prompt_input_ids"]} for feature in features]
570
+ prompt_input_ids = self.prompt_tokenizer.pad(
571
+ prompt_input_ids,
572
+ return_tensors="pt",
573
+ padding=self.padding,
574
+ pad_to_multiple_of=self.pad_to_multiple_of,
575
+ max_length=self.prompt_max_length,
576
+ )
577
+
578
+ batch["prompt_input_ids"] = prompt_input_ids["input_ids"]
579
+ if "attention_mask" in prompt_input_ids:
580
+ batch["prompt_attention_mask"] = prompt_input_ids["attention_mask"]
581
+
582
+ return batch
583
+
584
+
585
+ def convert_dataset_str_to_list(
586
+ dataset_names,
587
+ dataset_config_names,
588
+ metadata_dataset_names=None,
589
+ splits=None,
590
+ dataset_samples=None,
591
+ default_split="train",
592
+ ):
593
+ if isinstance(dataset_names, str):
594
+ dataset_names = dataset_names.split("+")
595
+ dataset_config_names = dataset_config_names.split("+")
596
+ splits = splits.split("+") if splits is not None else None
597
+ dataset_samples = dataset_samples.split("+") if dataset_samples is not None else None
598
+ metadata_dataset_names = metadata_dataset_names.split("+") if metadata_dataset_names is not None else None
599
+
600
+ # basic checks to ensure we've got the right number of datasets/configs/splits/columns/probs
601
+ if len(dataset_names) != len(dataset_config_names):
602
+ raise ValueError(
603
+ f"Ensure one config is passed for each dataset, got {len(dataset_names)} datasets and"
604
+ f" {len(dataset_config_names)} configs."
605
+ )
606
+
607
+ if splits is not None and len(splits) != len(dataset_names):
608
+ raise ValueError(
609
+ f"Ensure one split is passed for each dataset, got {len(dataset_names)} datasets and {len(splits)} splits."
610
+ )
611
+
612
+ if metadata_dataset_names is not None and len(metadata_dataset_names) != len(dataset_names):
613
+ raise ValueError(
614
+ f"Ensure one metadata dataset is passed for each dataset, got {len(dataset_names)} datasets and {len(metadata_dataset_names)} metadata datasets."
615
+ )
616
+
617
+ if dataset_samples is not None:
618
+ if len(dataset_samples) != len(dataset_names):
619
+ raise ValueError(
620
+ f"Ensure one sample is passed for each dataset, got {len(dataset_names)} datasets and "
621
+ f"{len(dataset_samples)} samples."
622
+ )
623
+ dataset_samples = [float(ds_sample) for ds_sample in dataset_samples]
624
+ else:
625
+ dataset_samples = [None] * len(dataset_names)
626
+
627
+ splits = splits if splits is not None else [default_split for _ in range(len(dataset_names))]
628
+
629
+ dataset_names_dict = []
630
+ for i, ds_name in enumerate(dataset_names):
631
+ dataset_names_dict.append(
632
+ {
633
+ "name": ds_name,
634
+ "config": dataset_config_names[i],
635
+ "split": splits[i],
636
+ "metadata_dataset_name": metadata_dataset_names[i],
637
+ "samples": dataset_samples[i],
638
+ }
639
+ )
640
+ return dataset_names_dict
641
+
642
+
643
+ def load_multiple_datasets(
644
+ accelerator: Accelerator,
645
+ dataset_names: Union[List, str],
646
+ dataset_config_names: Union[List, str],
647
+ metadata_dataset_names: Optional[str] = None,
648
+ splits: Optional[Union[List, str]] = None,
649
+ label_column_names: Optional[List] = None,
650
+ stopping_strategy: Optional[str] = "first_exhausted",
651
+ dataset_samples: Optional[Union[List, np.array]] = None,
652
+ streaming: Optional[bool] = False,
653
+ seed: Optional[int] = None,
654
+ id_column_name: Optional[str] = None,
655
+ columns_to_keep: Optional[Set[str]] = None,
656
+ prompt_column_name: Optional[str] = None,
657
+ sampling_rate: Optional[int] = None,
658
+ audio_column_name: Optional[str] = None,
659
+ **kwargs,
660
+ ) -> Union[Dataset, IterableDataset]:
661
+ dataset_names_dict = convert_dataset_str_to_list(
662
+ dataset_names, dataset_config_names, metadata_dataset_names, splits, label_column_names, dataset_samples
663
+ )
664
+
665
+ if dataset_samples is not None:
666
+ dataset_samples = [ds_dict["samples"] for ds_dict in dataset_names_dict]
667
+ probabilities = np.array(dataset_samples) / np.sum(dataset_samples)
668
+ else:
669
+ probabilities = None
670
+
671
+ all_datasets = []
672
+ # iterate over the datasets we want to interleave
673
+ for dataset_dict in tqdm(dataset_names_dict, desc="Combining datasets..."):
674
+ with accelerator.main_process_first():
675
+ dataset = load_dataset(
676
+ dataset_dict["name"],
677
+ dataset_dict["config"],
678
+ split=dataset_dict["split"],
679
+ streaming=streaming,
680
+ **kwargs,
681
+ )
682
+ dataset_features = dataset.features.keys()
683
+
684
+ if sampling_rate is not None and audio_column_name is not None:
685
+ # resample target audio
686
+ dataset = dataset.cast_column(audio_column_name, datasets.features.Audio(sampling_rate=sampling_rate))
687
+
688
+ metadata_dataset_name = dataset_dict["metadata_dataset_name"]
689
+ if metadata_dataset_name is not None:
690
+ logger.info(
691
+ f'Merging {dataset_dict["name"]} - {dataset_dict["split"]} with {metadata_dataset_name} - {dataset_dict["split"]}'
692
+ )
693
+ metadata_dataset = load_dataset(
694
+ metadata_dataset_name,
695
+ dataset_dict["config"],
696
+ split=dataset_dict["split"],
697
+ streaming=streaming,
698
+ **kwargs,
699
+ )
700
+
701
+ # TODO(YL): I forgot to create unique ids for MLS english.
702
+ # To iterate faster, I bypass the original id check and do another one. - Done once because assuming it won't change next time
703
+ # if dataset_dict["name"] == "parler-tts/mls_eng_10k":
704
+ # def concat_ids(book_id, speaker_id, begin_time):
705
+ # return {"id": f"{book_id}_{speaker_id}_{str(begin_time).replace('.', '_')}"}
706
+ # dataset = dataset.map(concat_ids, input_columns=["book_id", "speaker_id", "begin_time"], num_proc=24)
707
+ # metadata_dataset = metadata_dataset.map(concat_ids, input_columns=["book_id", "speaker_id", "begin_time"], num_proc=24)
708
+ # metadata_dataset = metadata_dataset.rename_column(id_column_name, f"metadata_{id_column_name}")
709
+
710
+ if dataset_dict["name"] != "parler-tts/mls_eng_10k":
711
+ if id_column_name is not None and id_column_name not in dataset.column_names:
712
+ raise ValueError(
713
+ f"id_column_name={id_column_name} but has not been found in the dataset columns"
714
+ f"- one of {', '.join(list(dataset.column_names))}."
715
+ )
716
+ if id_column_name is not None and id_column_name not in metadata_dataset.column_names:
717
+ raise ValueError(
718
+ f"id_column_name={id_column_name} but has not been found in the metadata dataset columns"
719
+ f"- one of {', '.join(list(metadata_dataset.column_names))}."
720
+ )
721
+ elif id_column_name is not None:
722
+ metadata_dataset = metadata_dataset.rename_column(id_column_name, f"metadata_{id_column_name}")
723
+
724
+ metadata_columns_to_remove = set(metadata_dataset.column_names).intersection(set(dataset.column_names))
725
+
726
+ if prompt_column_name is not None:
727
+ # We might have applied some transformations to the prompts (e.g punctuation restoration)
728
+ # so we make sure to remove it from the original dataset
729
+ if prompt_column_name in dataset.column_names:
730
+ logger.info(
731
+ f"REMOVE {prompt_column_name} from dataset {dataset_dict['name']} - dataset_dict['split']"
732
+ )
733
+ dataset.remove_columns(prompt_column_name)
734
+
735
+ metadata_columns_to_remove = set(metadata_dataset.column_names).intersection(set(dataset.column_names))
736
+ metadata_dataset = metadata_dataset.remove_columns(metadata_columns_to_remove)
737
+
738
+ dataset = concatenate_datasets([dataset, metadata_dataset], axis=1)
739
+
740
+ if id_column_name is not None and dataset_dict["name"] != "parler-tts/mls_eng_10k":
741
+ if (
742
+ len(
743
+ dataset.filter(
744
+ lambda id1, id2: id1 != id2,
745
+ input_columns=[id_column_name, f"metadata_{id_column_name}"],
746
+ )
747
+ )
748
+ != 0
749
+ ):
750
+ raise ValueError(
751
+ f"Concatenate didn't work. Some ids don't correspond on dataset {dataset_dict['name']}"
752
+ )
753
+
754
+ dataset_features = dataset.features.keys()
755
+
756
+ if columns_to_keep is not None:
757
+ dataset = dataset.remove_columns(set(dataset_features - columns_to_keep))
758
+ all_datasets.append(dataset)
759
+
760
+ if len(all_datasets) == 1:
761
+ # we have a single dataset so just return it as is
762
+ return all_datasets[0]
763
+
764
+ if streaming:
765
+ interleaved_dataset = interleave_datasets(
766
+ all_datasets,
767
+ stopping_strategy=stopping_strategy,
768
+ probabilities=probabilities,
769
+ seed=seed,
770
+ )
771
+ else:
772
+ with accelerator.main_process_first():
773
+ interleaved_dataset = concatenate_datasets(all_datasets)
774
+
775
+ return interleaved_dataset
776
+
777
+
778
+ def main():
779
+ # See all possible arguments in src/transformers/training_args.py
780
+ # or by passing the --help flag to this script.
781
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
782
+
783
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ParlerTTSTrainingArguments))
784
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
785
+ # If we pass only one argument to the script and it's the path to a json file,
786
+ # let's parse it to get our arguments.
787
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
788
+ else:
789
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
790
+
791
+ # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
792
+ # information sent is the one passed as arguments along with your Python/PyTorch versions.
793
+ send_example_telemetry("run_parler_tts", model_args, data_args)
794
+
795
+ if training_args.dtype == "float16":
796
+ mixed_precision = "fp16"
797
+ elif training_args.dtype == "bfloat16":
798
+ mixed_precision = "bf16"
799
+ else:
800
+ mixed_precision = "no"
801
+
802
+ if data_args.pad_to_max_length and (
803
+ data_args.max_duration_in_seconds is None
804
+ or data_args.max_prompt_token_length is None
805
+ or data_args.max_description_token_length is None
806
+ ):
807
+ raise ValueError(
808
+ "`pad_to_max_length` is `True` but one of the following parameters has not been set: `max_duration_in_seconds`, `max_prompt_token_length`, `max_description_token_length`"
809
+ )
810
+
811
+ padding = "max_length" if data_args.pad_to_max_length else "longest"
812
+
813
+ ####### A. Preparation
814
+ kwargs_handlers = [InitProcessGroupKwargs(timeout=timedelta(minutes=60))]
815
+ if training_args.torch_compile:
816
+ # TODO(YL): add more compile modes?
817
+ kwargs_handlers.append(TorchDynamoPlugin(backend="inductor", mode="default")) # reduce-overhead
818
+
819
+ accelerator = Accelerator(
820
+ gradient_accumulation_steps=training_args.gradient_accumulation_steps,
821
+ mixed_precision=mixed_precision,
822
+ log_with=training_args.report_to,
823
+ project_dir=training_args.output_dir,
824
+ kwargs_handlers=kwargs_handlers,
825
+ )
826
+
827
+ accelerator.init_trackers(
828
+ project_name=data_args.wandb_project,
829
+ config={
830
+ "learning_rate": training_args.learning_rate,
831
+ "model_name_or_path": model_args.model_name_or_path,
832
+ "num_train_epochs": training_args.num_train_epochs,
833
+ "gradient_accumulation_steps": training_args.gradient_accumulation_steps,
834
+ "per_device_train_batch_size": training_args.per_device_train_batch_size,
835
+ "global_batch_size": training_args.per_device_train_batch_size * accelerator.num_processes,
836
+ "mixed_precision": mixed_precision,
837
+ "lr_scheduler_type": training_args.lr_scheduler_type,
838
+ "warmup_steps": training_args.warmup_steps,
839
+ "freeze_text_encoder": model_args.freeze_text_encoder,
840
+ "max_duration_in_seconds": data_args.max_duration_in_seconds,
841
+ "weight_decay": training_args.weight_decay,
842
+ "adam_beta1": training_args.adam_beta1,
843
+ "adam_beta2": training_args.adam_beta2,
844
+ "temperature": model_args.temperature,
845
+ },
846
+ )
847
+
848
+ # Detecting last checkpoint and eventually continue from last checkpoint
849
+ last_checkpoint = None
850
+ if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
851
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
852
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
853
+ raise ValueError(
854
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
855
+ "Use --overwrite_output_dir to overcome."
856
+ )
857
+ elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
858
+ logger.info(
859
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
860
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
861
+ )
862
+
863
+ # Setup logging
864
+ logging.basicConfig(
865
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
866
+ datefmt="%m/%d/%Y %H:%M:%S",
867
+ handlers=[logging.StreamHandler(sys.stdout)],
868
+ )
869
+ logger.setLevel(logging.INFO if accelerator.is_main_process else logging.WARN)
870
+
871
+ # Log a small summary on each proces
872
+ logger.warning(
873
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
874
+ f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
875
+ )
876
+
877
+ # Set the verbosity to info of the Transformers logger (on main process only)
878
+ if accelerator.is_local_main_process:
879
+ datasets.utils.logging.set_verbosity_warning()
880
+ transformers.utils.logging.set_verbosity_info()
881
+ else:
882
+ datasets.utils.logging.set_verbosity_error()
883
+ transformers.utils.logging.set_verbosity_error()
884
+
885
+ logger.info("Training/evaluation parameters %s", training_args)
886
+
887
+ # Set seed before initializing model.
888
+ set_seed(training_args.seed)
889
+ num_workers = data_args.preprocessing_num_workers
890
+
891
+ # 1. First, lett's instantiate the feature extractor, tokenizers and model
892
+ # Note for distributed training, the .from_pretrained methods guarantee that only
893
+ # one local process can concurrently download model & vocab.
894
+
895
+ # load feature extractor
896
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
897
+ model_args.feature_extractor_name or model_args.model_name_or_path,
898
+ cache_dir=model_args.cache_dir,
899
+ token=data_args.token,
900
+ trust_remote_code=data_args.trust_remote_code,
901
+ )
902
+ sampling_rate = feature_extractor.sampling_rate
903
+
904
+ # load prompt tokenizer
905
+ prompt_tokenizer = AutoTokenizer.from_pretrained(
906
+ model_args.prompt_tokenizer_name or model_args.description_tokenizer_name or model_args.model_name_or_path,
907
+ cache_dir=model_args.cache_dir,
908
+ token=data_args.token,
909
+ trust_remote_code=data_args.trust_remote_code,
910
+ use_fast=model_args.use_fast_tokenizer,
911
+ padding_side="left", # prompt has to be padded on the left bc it's preprend to codebooks hidden states
912
+ )
913
+
914
+ # load description tokenizer
915
+ description_tokenizer = AutoTokenizer.from_pretrained(
916
+ model_args.description_tokenizer_name or model_args.model_name_or_path,
917
+ cache_dir=model_args.cache_dir,
918
+ token=data_args.token,
919
+ trust_remote_code=data_args.trust_remote_code,
920
+ use_fast=model_args.use_fast_tokenizer,
921
+ )
922
+
923
+ if model_args.use_fast_tokenizer:
924
+ logger.warning(
925
+ "Disabling fast tokenizer warning: https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L3231-L3235"
926
+ )
927
+ prompt_tokenizer.deprecation_warnings["Asking-to-pad-a-fast-tokenizer"] = True
928
+ description_tokenizer.deprecation_warnings["Asking-to-pad-a-fast-tokenizer"] = True
929
+
930
+ # 2. Now, let's load the dataset
931
+
932
+ if data_args.save_to_disk is not None:
933
+ os.makedirs(data_args.save_to_disk, exist_ok=True)
934
+
935
+ # assume that the dataset has been saved to `save_to_disk` if the latter is not empty
936
+ dataset_was_precomputed = len(os.listdir(data_args.save_to_disk)) > 0
937
+ if dataset_was_precomputed:
938
+ vectorized_datasets = datasets.load_from_disk(data_args.save_to_disk)
939
+ else:
940
+ raw_datasets = DatasetDict()
941
+
942
+ columns_to_keep = {
943
+ "target_audio_column_name": data_args.target_audio_column_name,
944
+ "prompt_column_name": data_args.prompt_column_name,
945
+ }
946
+ if data_args.description_column_name is not None:
947
+ columns_to_keep["description_column_name"] = data_args.description_column_name
948
+
949
+ if training_args.do_train:
950
+ raw_datasets["train"] = load_multiple_datasets(
951
+ accelerator,
952
+ data_args.train_dataset_name,
953
+ data_args.train_dataset_config_name,
954
+ metadata_dataset_names=data_args.train_metadata_dataset_name,
955
+ splits=data_args.train_split_name,
956
+ dataset_samples=data_args.train_dataset_samples,
957
+ seed=training_args.seed,
958
+ cache_dir=model_args.cache_dir,
959
+ num_proc=data_args.preprocessing_num_workers,
960
+ id_column_name=data_args.id_column_name,
961
+ columns_to_keep=columns_to_keep.values(),
962
+ prompt_column_name=data_args.prompt_column_name,
963
+ audio_column_name=data_args.target_audio_column_name,
964
+ sampling_rate=sampling_rate,
965
+ # streaming=data_args.streaming, TODO(SG): optionally enable streaming mode
966
+ )
967
+
968
+ for key in columns_to_keep:
969
+ if columns_to_keep[key] not in raw_datasets["train"].column_names:
970
+ raise ValueError(
971
+ f"--{key} '{columns_to_keep[key]}' not found in dataset '{data_args.train_dataset_name}'."
972
+ f" Make sure to set `--{key}` to the correct audio column - one of"
973
+ f" {', '.join(raw_datasets['train'].column_names)}."
974
+ )
975
+
976
+ if data_args.max_train_samples is not None:
977
+ raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
978
+
979
+ if training_args.do_eval:
980
+ raw_datasets["eval"] = load_multiple_datasets(
981
+ accelerator,
982
+ data_args.eval_dataset_name if data_args.eval_dataset_name else data_args.train_dataset_name,
983
+ data_args.eval_dataset_config_name
984
+ if data_args.eval_dataset_config_name
985
+ else data_args.train_dataset_config_name,
986
+ metadata_dataset_names=data_args.eval_metadata_dataset_name,
987
+ splits=data_args.eval_split_name,
988
+ cache_dir=model_args.cache_dir,
989
+ num_proc=data_args.preprocessing_num_workers,
990
+ id_column_name=data_args.id_column_name,
991
+ columns_to_keep=columns_to_keep.values(),
992
+ prompt_column_name=data_args.prompt_column_name,
993
+ audio_column_name=data_args.target_audio_column_name,
994
+ sampling_rate=sampling_rate,
995
+ # streaming=data_args.streaming, TODO(SG): optionally enable streaming mode
996
+ )
997
+
998
+ if data_args.max_eval_samples is not None:
999
+ raw_datasets["eval"] = (
1000
+ raw_datasets["eval"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
1001
+ )
1002
+
1003
+ # 3. Next, let's load the config.
1004
+ config = ParlerTTSConfig.from_pretrained(
1005
+ model_args.model_name_or_path,
1006
+ cache_dir=model_args.cache_dir,
1007
+ token=data_args.token,
1008
+ trust_remote_code=data_args.trust_remote_code,
1009
+ )
1010
+
1011
+ # update pad token id and decoder_start_token_id
1012
+ config.update(
1013
+ {
1014
+ "pad_token_id": model_args.pad_token_id if model_args.pad_token_id is not None else config.pad_token_id,
1015
+ "decoder_start_token_id": (
1016
+ model_args.decoder_start_token_id
1017
+ if model_args.decoder_start_token_id is not None
1018
+ else config.decoder_start_token_id
1019
+ ),
1020
+ }
1021
+ )
1022
+
1023
+ # create model
1024
+ model = ParlerTTSForConditionalGeneration.from_pretrained(
1025
+ model_args.model_name_or_path,
1026
+ cache_dir=model_args.cache_dir,
1027
+ config=config,
1028
+ token=data_args.token,
1029
+ trust_remote_code=data_args.trust_remote_code,
1030
+ )
1031
+
1032
+ # enable gradient checkpointing if necessary
1033
+ if training_args.gradient_checkpointing:
1034
+ model.gradient_checkpointing_enable()
1035
+
1036
+ # 4. Now we preprocess the datasets including loading the audio, resampling and normalization
1037
+ # Thankfully, `datasets` takes care of automatically loading and resampling the audio,
1038
+ # so that we just need to set the correct target sampling rate and normalize the input
1039
+ # via the `feature_extractor`
1040
+
1041
+ # derive max & min input length for sample rate & max duration
1042
+ sampling_rate = feature_extractor.sampling_rate
1043
+ max_target_length = data_args.max_duration_in_seconds * sampling_rate
1044
+ min_target_length = data_args.min_duration_in_seconds * sampling_rate
1045
+ target_audio_column_name = data_args.target_audio_column_name
1046
+ description_column_name = data_args.description_column_name
1047
+ prompt_column_name = data_args.prompt_column_name
1048
+ feature_extractor_input_name = feature_extractor.model_input_names[0]
1049
+ audio_encoder_pad_token_id = config.decoder.pad_token_id
1050
+ audio_encoder_eos_token_id = config.decoder.eos_token_id
1051
+ audio_encoder_bos_token_id = model.generation_config.decoder_start_token_id
1052
+ max_length = model.generation_config.max_length
1053
+ num_codebooks = model.decoder.config.num_codebooks
1054
+ bandwidth = model_args.bandwidth
1055
+
1056
+ # Freeze Encoders
1057
+ model.freeze_encoders(model_args.freeze_text_encoder)
1058
+
1059
+ # Test all gather - used for warmout and avoiding timeout
1060
+ test_tensor = torch.tensor([accelerator.process_index], device=accelerator.device)
1061
+ gathered_tensor = accelerator.gather(test_tensor)
1062
+ print("gathered_tensor", gathered_tensor)
1063
+ accelerator.wait_for_everyone()
1064
+
1065
+ if not dataset_was_precomputed:
1066
+ # Filter on text length
1067
+ if description_column_name is not None and data_args.max_text_length is not None:
1068
+ with accelerator.main_process_first():
1069
+ # filter description that is shorter than max_text_length
1070
+ raw_datasets = raw_datasets.filter(
1071
+ lambda x: len(x) < data_args.max_text_length,
1072
+ num_proc=num_workers,
1073
+ input_columns=[description_column_name],
1074
+ )
1075
+
1076
+ # Preprocessing the dataset.
1077
+ # We need to tokenize the texts.
1078
+ def pass_through_processors(description, prompt):
1079
+ batch = {}
1080
+
1081
+ batch["input_ids"] = description_tokenizer(description.strip())["input_ids"]
1082
+ batch["prompt_input_ids"] = prompt_tokenizer(prompt.strip())["input_ids"]
1083
+
1084
+ return batch
1085
+
1086
+ with accelerator.main_process_first():
1087
+ # this is a trick to avoid to rewrite the entire audio column which takes ages
1088
+ vectorized_datasets = raw_datasets.map(
1089
+ pass_through_processors,
1090
+ remove_columns=next(iter(raw_datasets.values())).column_names,
1091
+ input_columns=[description_column_name, prompt_column_name],
1092
+ num_proc=num_workers,
1093
+ desc="preprocess datasets",
1094
+ )
1095
+
1096
+ # We use Accelerate to perform distributed inference
1097
+ # T5 doesn't support fp16
1098
+ autocast_kwargs = AutocastKwargs(enabled=(mixed_precision != "fp16"))
1099
+
1100
+ # Now we encode the audio labels with encodec.
1101
+ ####### B. Encode audio
1102
+
1103
+ logger.info("*** Encode target audio with encodec ***")
1104
+
1105
+ # no need to prepare audio_decoder because used for inference without mixed precision
1106
+ # see: https://huggingface.co/docs/accelerate/main/en/package_reference/accelerator#accelerate.Accelerator.prepare
1107
+ if training_args.torch_compile:
1108
+ audio_decoder = accelerator.prepare_model(model.audio_encoder, evaluation_mode=True)
1109
+ else:
1110
+ audio_decoder = model.audio_encoder
1111
+
1112
+ encoder_data_collator = DataCollatorEncodecWithPadding(
1113
+ feature_extractor,
1114
+ audio_column_name=target_audio_column_name,
1115
+ feature_extractor_input_name=feature_extractor_input_name,
1116
+ max_length=max_target_length,
1117
+ padding=padding,
1118
+ )
1119
+
1120
+ def apply_audio_decoder(batch):
1121
+ len_audio = batch.pop("len_audio")
1122
+ audio_decoder.to(batch["input_values"].device).eval()
1123
+ with torch.no_grad():
1124
+ labels = audio_decoder.encode(**batch, bandwidth=bandwidth)["audio_codes"]
1125
+ output = {}
1126
+ output["len_audio"] = len_audio
1127
+ # (1, bsz, codebooks, seq_len) -> (bsz, seq_len, codebooks)
1128
+ output["labels"] = labels.squeeze(0).transpose(1, 2)
1129
+ output["ratio"] = torch.ones_like(len_audio) * labels.shape[-1] / len_audio.max()
1130
+ return output
1131
+
1132
+ for split in vectorized_datasets:
1133
+ data_loader = DataLoader(
1134
+ raw_datasets[split],
1135
+ batch_size=training_args.audio_encoder_per_device_batch_size,
1136
+ collate_fn=encoder_data_collator,
1137
+ num_workers=training_args.dataloader_num_workers,
1138
+ pin_memory=True,
1139
+ )
1140
+ data_loader = accelerator.prepare(data_loader)
1141
+
1142
+ all_generated_labels = []
1143
+ all_lens = []
1144
+ for batch in tqdm(data_loader, disable=not accelerator.is_local_main_process):
1145
+ generate_labels = apply_audio_decoder(batch)
1146
+ generate_labels = accelerator.pad_across_processes(generate_labels, dim=1, pad_index=0)
1147
+ generate_labels = accelerator.gather_for_metrics(generate_labels)
1148
+
1149
+ if accelerator.is_main_process:
1150
+ lab = generate_labels["labels"].cpu().transpose(1, 2).to(torch.int16)
1151
+ rat = generate_labels["ratio"].cpu().squeeze()
1152
+ lens = generate_labels["len_audio"].cpu().squeeze()
1153
+ lab = [l[:, : int(ratio * length)] for (l, ratio, length) in zip(lab, rat, lens)]
1154
+
1155
+ all_generated_labels.extend(lab)
1156
+ all_lens.extend(lens)
1157
+
1158
+ # (1, codebooks, seq_len) where seq_len=1
1159
+ bos_labels = torch.ones((1, num_codebooks, 1)) * audio_encoder_bos_token_id
1160
+
1161
+ if accelerator.is_main_process:
1162
+ tmp_labels = Dataset.from_dict({"labels": all_generated_labels, "target_length": all_lens})
1163
+ tmp_labels.save_to_disk(
1164
+ os.path.join(data_args.temporary_save_to_disk, split),
1165
+ num_proc=1 if split == "eval" else data_args.preprocessing_num_workers,
1166
+ )
1167
+ accelerator.wait_for_everyone()
1168
+ del all_generated_labels
1169
+
1170
+ tmp_labels = datasets.load_from_disk(os.path.join(data_args.temporary_save_to_disk, split))
1171
+ with accelerator.main_process_first():
1172
+ vectorized_datasets[split] = concatenate_datasets([vectorized_datasets[split], tmp_labels], axis=1)
1173
+
1174
+ def postprocess_dataset(labels):
1175
+ # (1, codebooks, seq_len)
1176
+ labels = torch.tensor(labels).unsqueeze(0)
1177
+ # add bos
1178
+ labels = torch.cat([bos_labels, labels], dim=-1)
1179
+
1180
+ labels, delay_pattern_mask = build_delay_pattern_mask(
1181
+ labels,
1182
+ bos_token_id=audio_encoder_bos_token_id,
1183
+ pad_token_id=audio_encoder_eos_token_id,
1184
+ max_length=labels.shape[-1] + num_codebooks,
1185
+ num_codebooks=num_codebooks,
1186
+ )
1187
+
1188
+ # the first ids of the delay pattern mask are precisely labels, we use the rest of the labels mask
1189
+ # to take care of EOS
1190
+ # we want labels to look like this:
1191
+ # - [B, a, b, E, E, E, E]
1192
+ # - [B, B, c, d, E, E, E]
1193
+ # - [B, B, B, e, f, E, E]
1194
+ # - [B, B, B, B, g, h, E]
1195
+ labels = torch.where(delay_pattern_mask == -1, audio_encoder_eos_token_id, delay_pattern_mask)
1196
+
1197
+ # the first timestamp is associated to a row full of BOS, let's get rid of it
1198
+ # we also remove the last timestampts (full of PAD)
1199
+ output = {"labels": labels[:, 1:]}
1200
+ return output
1201
+
1202
+ with accelerator.main_process_first():
1203
+ vectorized_datasets[split] = vectorized_datasets[split].map(
1204
+ postprocess_dataset,
1205
+ num_proc=data_args.preprocessing_num_workers, # this one is resource consuming if many processor.
1206
+ input_columns=["labels"],
1207
+ desc="Postprocessing labeling",
1208
+ )
1209
+
1210
+ accelerator.free_memory()
1211
+ del generate_labels, all_lens
1212
+
1213
+ with accelerator.main_process_first():
1214
+ # NOTE: filtering is done at the end because in the `datasets` library, caching audio files is done after most operations
1215
+ # caching audio files is time and disk-space consuming, so we want to avoid it at all costs, especially for large (>1Kh) audio datasets.
1216
+ # That's also why we avoid to concat the processed datasets (vectorized_datasets) with the audio column present in raw_datasets.
1217
+
1218
+ def is_audio_in_length_range(length):
1219
+ return length > min_target_length and length < max_target_length
1220
+
1221
+ # filter data that is shorter than min_target_length
1222
+ vectorized_datasets = vectorized_datasets.filter(
1223
+ is_audio_in_length_range,
1224
+ num_proc=num_workers,
1225
+ input_columns=["target_length"],
1226
+ )
1227
+
1228
+ if description_column_name is not None and data_args.max_description_token_length is not None:
1229
+ with accelerator.main_process_first():
1230
+ # filter description that is shorter than max_text_length
1231
+ vectorized_datasets = vectorized_datasets.filter(
1232
+ lambda x: len(x) < data_args.max_description_token_length,
1233
+ num_proc=num_workers,
1234
+ input_columns=["input_ids"],
1235
+ )
1236
+
1237
+ if data_args.max_prompt_token_length is not None:
1238
+ with accelerator.main_process_first():
1239
+ # filter description that is shorter than max_text_length
1240
+ vectorized_datasets = vectorized_datasets.filter(
1241
+ lambda x: len(x) < data_args.max_prompt_token_length,
1242
+ num_proc=num_workers,
1243
+ input_columns=["prompt_input_ids"],
1244
+ )
1245
+
1246
+ if data_args.save_to_disk is not None and not dataset_was_precomputed:
1247
+ if accelerator.is_main_process:
1248
+ vectorized_datasets.save_to_disk(
1249
+ data_args.save_to_disk,
1250
+ num_proc=min(data_args.preprocessing_num_workers, len(vectorized_datasets["eval"]) - 1),
1251
+ )
1252
+ logger.info(f"Dataset saved at {data_args.save_to_disk}")
1253
+
1254
+ audio_max_length = None
1255
+ if training_args.torch_compile:
1256
+ audio_max_length = max(vectorized_datasets["train"]["target_length"])
1257
+ with accelerator.main_process_first():
1258
+ max_sample = vectorized_datasets["train"].filter(
1259
+ lambda x: x == audio_max_length,
1260
+ num_proc=num_workers,
1261
+ input_columns=["target_length"],
1262
+ )
1263
+ audio_max_length = torch.tensor(max_sample[0]["labels"]).shape[1]
1264
+
1265
+ # for large datasets it is advised to run the preprocessing on a
1266
+ # single machine first with ``args.preprocessing_only`` since there will mostly likely
1267
+ # be a timeout when running the script in distributed mode.
1268
+ # In a second step ``args.preprocessing_only`` can then be set to `False` to load the
1269
+ # cached dataset
1270
+ if data_args.preprocessing_only and data_args.save_to_disk is None:
1271
+ raise ValueError(
1272
+ "`preprocessing_only=True` but `save_to_disk` is not set. The latter should indicates where to save the dataset locally."
1273
+ )
1274
+ elif data_args.preprocessing_only:
1275
+ logger.info(f"Data preprocessing finished. Files save at {data_args.save_to_disk}")
1276
+ return
1277
+
1278
+ # 6. Next, we can prepare the training.
1279
+
1280
+ # Let's use word CLAP similary and WER metrics as our evaluation metrics,
1281
+
1282
+ # Define evaluation metrics during training, *i.e.* CLAP similarity
1283
+ clap = AutoModel.from_pretrained(model_args.clap_model_name_or_path)
1284
+ clap_processor = AutoProcessor.from_pretrained(model_args.clap_model_name_or_path)
1285
+ metric = evaluate.load("wer")
1286
+
1287
+ def clap_similarity(texts, audios, device):
1288
+ clap_inputs = clap_processor(text=texts, audios=audios, padding=True, return_tensors="pt").to(device)
1289
+ clap.to(device)
1290
+ with torch.no_grad():
1291
+ text_features = clap.get_text_features(
1292
+ clap_inputs["input_ids"], attention_mask=clap_inputs.get("attention_mask", None)
1293
+ )
1294
+ audio_features = clap.get_audio_features(clap_inputs["input_features"])
1295
+
1296
+ cosine_sim = torch.nn.functional.cosine_similarity(audio_features, text_features, dim=1, eps=1e-8)
1297
+
1298
+ clap.to("cpu")
1299
+ clap_inputs.to("cpu")
1300
+ return cosine_sim.mean().to("cpu")
1301
+
1302
+ def wer(prompts, audios, device):
1303
+ asr_pipeline = pipeline(model=model_args.asr_model_name_or_path, device=device)
1304
+ transcriptions = asr_pipeline(
1305
+ [{"raw": audio, "sampling_rate": sampling_rate} for audio in audios],
1306
+ batch_size=int(training_args.per_device_eval_batch_size),
1307
+ )
1308
+
1309
+ word_error = 100 * metric.compute(
1310
+ predictions=[t["text"].lower() for t in transcriptions], references=[t.lower() for t in prompts]
1311
+ )
1312
+
1313
+ return word_error, [t["text"] for t in transcriptions]
1314
+
1315
+ eval_methods = {"clap": clap_similarity, "wer": wer}
1316
+
1317
+ def compute_metrics(audios, descriptions, prompts, device="cpu"):
1318
+ input_ids = descriptions
1319
+ texts = description_tokenizer.batch_decode(input_ids, skip_special_tokens=True)
1320
+ prompts = prompt_tokenizer.batch_decode(prompts, skip_special_tokens=True)
1321
+ audios = [a.cpu().numpy() for a in audios]
1322
+ results = {"clap": eval_methods["clap"](texts, audios, device)}
1323
+ word_error, transcriptions = eval_methods["wer"](prompts, audios, device)
1324
+ results["wer"] = word_error
1325
+
1326
+ return results, texts, prompts, audios, transcriptions
1327
+
1328
+ # Define Training Schedule
1329
+ # Store some constants
1330
+ per_device_train_batch_size = int(training_args.per_device_train_batch_size)
1331
+ train_batch_size = per_device_train_batch_size * accelerator.num_processes
1332
+ gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
1333
+ per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
1334
+
1335
+ if training_args.max_steps < 0:
1336
+ num_epochs = int(training_args.num_train_epochs)
1337
+ steps_per_epoch = len(vectorized_datasets["train"]) // (train_batch_size * gradient_accumulation_steps)
1338
+ total_train_steps = steps_per_epoch * num_epochs
1339
+ elif training_args.max_steps > 0:
1340
+ logger.info("max_steps is given, it will override any value given in num_train_epochs")
1341
+ total_train_steps = int(training_args.max_steps)
1342
+ # Setting a very large number of epochs so we go as many times as necessary over the iterator.
1343
+ num_epochs = sys.maxsize
1344
+ steps_per_epoch = total_train_steps
1345
+
1346
+ if training_args.evaluation_strategy == "epoch":
1347
+ eval_steps = steps_per_epoch
1348
+ elif training_args.eval_steps is None:
1349
+ logger.info(f"eval_steps is not set, evaluating at the end of each epoch")
1350
+ eval_steps = steps_per_epoch
1351
+ else:
1352
+ eval_steps = training_args.eval_steps
1353
+
1354
+ if training_args.save_strategy == "epoch":
1355
+ save_steps = steps_per_epoch
1356
+ elif training_args.save_strategy == "steps":
1357
+ save_steps = training_args.save_steps
1358
+ else:
1359
+ save_steps = sys.maxsize
1360
+
1361
+ # T5 doesn't support fp16
1362
+ autocast_kwargs = AutocastKwargs(enabled=(mixed_precision != "fp16"))
1363
+
1364
+ # Define optimizer, LR scheduler, collator
1365
+ optimizer = torch.optim.AdamW(
1366
+ params=model.parameters(),
1367
+ lr=training_args.learning_rate,
1368
+ betas=(training_args.adam_beta1, training_args.adam_beta2),
1369
+ eps=training_args.adam_epsilon,
1370
+ weight_decay=training_args.weight_decay,
1371
+ )
1372
+
1373
+ # LR scheduler gets stepped by `num_processes` each time -> account for this in warmup / total steps
1374
+ lr_scheduler = get_scheduler(
1375
+ name=training_args.lr_scheduler_type,
1376
+ optimizer=optimizer,
1377
+ num_warmup_steps=training_args.get_warmup_steps(total_train_steps) * accelerator.num_processes,
1378
+ num_training_steps=total_train_steps * accelerator.num_processes,
1379
+ )
1380
+
1381
+ # Instantiate custom data collator
1382
+ data_collator = DataCollatorParlerTTSWithPadding(
1383
+ prompt_tokenizer=prompt_tokenizer,
1384
+ description_tokenizer=description_tokenizer,
1385
+ pad_to_multiple_of=data_args.pad_to_multiple_of,
1386
+ padding=padding,
1387
+ prompt_max_length=data_args.max_prompt_token_length,
1388
+ description_max_length=data_args.max_description_token_length,
1389
+ audio_max_length=audio_max_length,
1390
+ )
1391
+
1392
+ # Prepare everything with accelerate
1393
+ model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler)
1394
+
1395
+ logger.info("***** Running training *****")
1396
+ logger.info(f" Num examples = {total_train_steps * train_batch_size * gradient_accumulation_steps}")
1397
+ logger.info(" Instantaneous batch size per device =" f" {per_device_train_batch_size}")
1398
+ logger.info(" Gradient accumulation steps =" f" {gradient_accumulation_steps}")
1399
+ logger.info(
1400
+ f" Total train batch size (w. parallel & distributed) = {train_batch_size * gradient_accumulation_steps}"
1401
+ )
1402
+ logger.info(f" Total optimization steps = {total_train_steps}")
1403
+
1404
+ # ======================== Training ================================
1405
+ train_time = 0
1406
+ train_start = time.time()
1407
+ steps_trained_progress_bar = tqdm(
1408
+ range(total_train_steps), desc="Train steps ... ", position=0, disable=not accelerator.is_local_main_process
1409
+ )
1410
+ continue_training = True
1411
+ epochs_trained = 0
1412
+ cur_step = 0
1413
+
1414
+ checkpoint = None
1415
+ if training_args.resume_from_checkpoint is not None:
1416
+ checkpoint = training_args.resume_from_checkpoint
1417
+ elif last_checkpoint is not None:
1418
+ checkpoint = last_checkpoint
1419
+
1420
+ if accelerator.is_main_process:
1421
+ if training_args.push_to_hub:
1422
+ # Retrieve of infer repo_name
1423
+ repo_name = training_args.hub_model_id
1424
+ if repo_name is None:
1425
+ repo_name = Path(training_args.output_dir).absolute().name
1426
+ # Create repo and retrieve repo_id
1427
+ repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
1428
+ # Clone repo locally
1429
+ repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
1430
+
1431
+ with open(os.path.join(training_args.output_dir, ".gitignore"), "w+") as gitignore:
1432
+ if "wandb" not in gitignore:
1433
+ gitignore.write("wandb\n")
1434
+ elif training_args.output_dir is not None:
1435
+ os.makedirs(training_args.output_dir, exist_ok=True)
1436
+ accelerator.wait_for_everyone()
1437
+
1438
+ # Now save everything to be able to create a single processor later
1439
+ # make sure all processes wait until data is saved
1440
+ with accelerator.main_process_first():
1441
+ # only the main process saves them
1442
+ if accelerator.is_main_process:
1443
+ # save feature extractor, tokenizer and config
1444
+ if (
1445
+ model_args.prompt_tokenizer_name is None
1446
+ and model_args.description_tokenizer_name
1447
+ or (model_args.prompt_tokenizer_name == model_args.description_tokenizer_name)
1448
+ ):
1449
+ prompt_tokenizer.save_pretrained(training_args.output_dir)
1450
+ else:
1451
+ logger.warning(
1452
+ "Prompt tokenizer ('{model_args.prompt_tokenizer_name}') and description tokenizer ('{model_args.description_tokenizer_name}') are not the same. Saving only the prompt tokenizer."
1453
+ )
1454
+ prompt_tokenizer.save_pretrained(training_args.output_dir)
1455
+
1456
+ feature_extractor.save_pretrained(training_args.output_dir)
1457
+ config.save_pretrained(training_args.output_dir)
1458
+
1459
+ if checkpoint is not None:
1460
+ accelerator.load_state(checkpoint)
1461
+ # Find num steps and epoch from saved state string pattern
1462
+ pattern = r"checkpoint-(\d+)-epoch-(\d+)"
1463
+ match = re.search(pattern, checkpoint)
1464
+ cur_step = int(match.group(1))
1465
+ epochs_trained = int(match.group(2))
1466
+
1467
+ logger.info(" Continuing training from checkpoint, will skip to saved global_step")
1468
+ logger.info(f" Continuing training from epoch {epochs_trained}")
1469
+ logger.info(f" Continuing training from global step {cur_step}")
1470
+
1471
+ steps_trained_progress_bar.update(cur_step)
1472
+
1473
+ for epoch in range(0, epochs_trained):
1474
+ vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(training_args.seed)
1475
+
1476
+ if training_args.max_steps < 0:
1477
+ # we know exactly the number of steps per epoch, so can skip through the required number of batches
1478
+ resume_step = (cur_step - epochs_trained * steps_per_epoch) * gradient_accumulation_steps
1479
+ else:
1480
+ # Currently we don't know how many steps we've taken in the current epoch
1481
+ # So we just shuffle the dataset one extra time and start from a fresh epoch
1482
+ # This is "good enough" for our purposes but not fully correct
1483
+ resume_step = None
1484
+ vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(training_args.seed)
1485
+ else:
1486
+ resume_step = None
1487
+
1488
+ gen_kwargs = {
1489
+ "do_sample": model_args.do_sample,
1490
+ "temperature": model_args.temperature,
1491
+ "max_length": model_args.max_length,
1492
+ }
1493
+
1494
+ # Define gradient update step fn
1495
+ def train_step(
1496
+ batch,
1497
+ accelerator,
1498
+ autocast_kwargs,
1499
+ ):
1500
+ model.train()
1501
+
1502
+ if mixed_precision == "fp16":
1503
+ # fp16 doesn't work with T5-like models
1504
+ with accelerator.autocast(autocast_handler=autocast_kwargs):
1505
+ if training_args.parallel_mode.value != "distributed":
1506
+ encoder_outputs = model.text_encoder(
1507
+ input_ids=batch.get("input_ids"), attention_mask=batch.get("attention_mask", None)
1508
+ )
1509
+ else:
1510
+ encoder_outputs = model.module.text_encoder(
1511
+ input_ids=batch.get("input_ids"), attention_mask=batch.get("attention_mask", None)
1512
+ )
1513
+ batch["encoder_outputs"] = encoder_outputs
1514
+
1515
+ outputs = model(**batch)
1516
+ # CE (data) loss
1517
+ ce_loss = outputs.loss
1518
+
1519
+ metrics = {"loss": ce_loss}
1520
+ return ce_loss, metrics
1521
+
1522
+ # Define eval fn
1523
+ def eval_step(
1524
+ batch,
1525
+ accelerator,
1526
+ autocast_kwargs,
1527
+ ):
1528
+ eval_model = model if not training_args.torch_compile else model._orig_mod
1529
+ eval_model.eval()
1530
+
1531
+ if mixed_precision == "fp16":
1532
+ # fp16 doesn't work with T5-like models
1533
+ with accelerator.autocast(autocast_handler=autocast_kwargs):
1534
+ with torch.no_grad():
1535
+ if training_args.parallel_mode.value != "distributed" or training_args.torch_compile:
1536
+ encoder_outputs = eval_model.text_encoder(
1537
+ input_ids=batch.get("input_ids"), attention_mask=batch.get("attention_mask", None)
1538
+ )
1539
+ else:
1540
+ encoder_outputs = eval_model.module.text_encoder(
1541
+ input_ids=batch.get("input_ids"), attention_mask=batch.get("attention_mask", None)
1542
+ )
1543
+ batch["encoder_outputs"] = encoder_outputs
1544
+
1545
+ with torch.no_grad():
1546
+ outputs = eval_model(**batch)
1547
+ # CE (data) loss
1548
+ ce_loss = outputs.loss
1549
+ metrics = {"loss": ce_loss}
1550
+ return metrics
1551
+
1552
+ def generate_step(batch):
1553
+ batch.pop("decoder_attention_mask", None)
1554
+ eval_model = accelerator.unwrap_model(model, keep_fp32_wrapper=mixed_precision != "fp16").eval()
1555
+ if training_args.torch_compile:
1556
+ eval_model = model._orig_mod
1557
+
1558
+ output_audios = eval_model.generate(**batch, **gen_kwargs)
1559
+ output_audios = accelerator.pad_across_processes(output_audios, dim=1, pad_index=0)
1560
+ return output_audios
1561
+
1562
+ for epoch in range(epochs_trained, num_epochs):
1563
+ vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(training_args.seed)
1564
+ sampler = None
1565
+ if training_args.group_by_length:
1566
+ sampler = LengthGroupedSampler(train_batch_size, lengths=vectorized_datasets["train"]["target_length"])
1567
+ train_dataloader = DataLoader(
1568
+ vectorized_datasets["train"],
1569
+ collate_fn=data_collator,
1570
+ batch_size=per_device_train_batch_size,
1571
+ sampler=sampler,
1572
+ num_workers=training_args.dataloader_num_workers,
1573
+ pin_memory=training_args.dataloader_pin_memory,
1574
+ )
1575
+ train_dataloader = accelerator.prepare(train_dataloader)
1576
+ if hasattr(train_dataloader, "dataset") and isinstance(train_dataloader.dataset, IterableDataset):
1577
+ train_dataloader.dataset.set_epoch(epoch)
1578
+
1579
+ if resume_step is not None:
1580
+ # Skip the first N batches in the dataloader when resuming from a checkpoint
1581
+ train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
1582
+ resume_step = None
1583
+
1584
+ for batch in train_dataloader:
1585
+ with accelerator.accumulate(model):
1586
+ loss, train_metric = train_step(batch, accelerator, autocast_kwargs)
1587
+ accelerator.backward(loss)
1588
+ if accelerator.sync_gradients:
1589
+ accelerator.clip_grad_norm_(model.parameters(), training_args.max_grad_norm)
1590
+ optimizer.step()
1591
+ lr_scheduler.step()
1592
+ optimizer.zero_grad()
1593
+
1594
+ # Check if the accelerator has performed an optimization step behind the scenes
1595
+ if accelerator.sync_gradients:
1596
+ steps_trained_progress_bar.update(1)
1597
+ cur_step += 1
1598
+
1599
+ if cur_step % training_args.logging_steps == 0:
1600
+ steps_trained_progress_bar.write(
1601
+ f"Step... ({cur_step} / {total_train_steps} | Loss:"
1602
+ f" {train_metric['loss']}, Learning Rate:"
1603
+ f" {lr_scheduler.get_last_lr()[0]})"
1604
+ )
1605
+ log_metric(
1606
+ accelerator,
1607
+ metrics=train_metric,
1608
+ learning_rate=lr_scheduler.get_last_lr()[0],
1609
+ train_time=train_time + time.time() - train_start,
1610
+ step=cur_step,
1611
+ epoch=epoch + (cur_step - epoch * steps_per_epoch) / steps_per_epoch,
1612
+ prefix="train",
1613
+ )
1614
+
1615
+ # save checkpoint and weights after each save_steps and at the end of training
1616
+ if (cur_step % save_steps == 0) or cur_step == total_train_steps:
1617
+ intermediate_dir = os.path.join(training_args.output_dir, f"checkpoint-{cur_step}-epoch-{epoch}")
1618
+ # safe_serialization=False to avoid shared tensors saving issue (TODO(YL): it's a temporary fix)
1619
+ # https://github.com/huggingface/transformers/issues/27293#issuecomment-1872560074
1620
+ accelerator.save_state(output_dir=intermediate_dir, safe_serialization=False)
1621
+ accelerator.wait_for_everyone()
1622
+ if accelerator.is_main_process:
1623
+ rotate_checkpoints(training_args.save_total_limit, output_dir=training_args.output_dir)
1624
+
1625
+ if cur_step == total_train_steps:
1626
+ # un-wrap student model for save
1627
+ unwrapped_model = accelerator.unwrap_model(model)
1628
+ unwrapped_model.save_pretrained(training_args.output_dir)
1629
+
1630
+ if training_args.push_to_hub:
1631
+ repo.push_to_hub(
1632
+ commit_message=f"Saving train state of step {cur_step}",
1633
+ blocking=False,
1634
+ )
1635
+
1636
+ if training_args.do_eval and (cur_step % eval_steps == 0 or cur_step == total_train_steps):
1637
+ train_time += time.time() - train_start
1638
+ # ======================== Evaluating ==============================
1639
+ eval_metrics = []
1640
+ eval_preds = []
1641
+ eval_descriptions = []
1642
+ eval_prompts = []
1643
+ eval_start = time.time()
1644
+
1645
+ # release training input batch
1646
+ batch = release_memory(batch)
1647
+
1648
+ validation_dataloader = DataLoader(
1649
+ vectorized_datasets["eval"],
1650
+ collate_fn=data_collator,
1651
+ batch_size=per_device_eval_batch_size,
1652
+ drop_last=False,
1653
+ num_workers=training_args.dataloader_pin_memory,
1654
+ pin_memory=training_args.dataloader_pin_memory,
1655
+ )
1656
+ validation_dataloader = accelerator.prepare(validation_dataloader)
1657
+
1658
+ for batch in tqdm(
1659
+ validation_dataloader,
1660
+ desc="Evaluating - Inference ...",
1661
+ position=2,
1662
+ disable=not accelerator.is_local_main_process,
1663
+ ):
1664
+ # Model forward
1665
+ eval_metric = eval_step(batch, accelerator, autocast_kwargs)
1666
+ eval_metric = accelerator.gather_for_metrics(eval_metric)
1667
+ eval_metrics.append(eval_metric)
1668
+
1669
+ if training_args.predict_with_generate:
1670
+ validation_dataloader = DataLoader(
1671
+ vectorized_datasets["eval"],
1672
+ collate_fn=data_collator,
1673
+ batch_size=per_device_eval_batch_size,
1674
+ drop_last=False,
1675
+ num_workers=training_args.dataloader_pin_memory,
1676
+ pin_memory=training_args.dataloader_pin_memory,
1677
+ )
1678
+ validation_dataloader = accelerator.prepare(validation_dataloader)
1679
+ # generation
1680
+ for batch in tqdm(
1681
+ validation_dataloader,
1682
+ desc="Evaluating - Generation ...",
1683
+ position=2,
1684
+ disable=not accelerator.is_local_main_process,
1685
+ ):
1686
+ generated_audios = generate_step(batch)
1687
+ # Gather all predictions and targets
1688
+ generated_audios, input_ids, prompts = accelerator.pad_across_processes(
1689
+ (generated_audios, batch["input_ids"], batch["prompt_input_ids"]), dim=1, pad_index=0
1690
+ )
1691
+ generated_audios, input_ids, prompts = accelerator.gather_for_metrics(
1692
+ (generated_audios, input_ids, prompts)
1693
+ )
1694
+ eval_preds.extend(generated_audios.to("cpu"))
1695
+ eval_descriptions.extend(input_ids.to("cpu"))
1696
+ eval_prompts.extend(prompts.to("cpu"))
1697
+
1698
+ eval_time = time.time() - eval_start
1699
+ # normalize eval metrics
1700
+ eval_metrics = {
1701
+ key: torch.mean(torch.cat([d[key].unsqueeze(0) for d in eval_metrics]))
1702
+ for key in eval_metrics[0]
1703
+ }
1704
+
1705
+ # compute metrics
1706
+ metrics_desc = ""
1707
+ if training_args.predict_with_generate:
1708
+ metric_values, pred_descriptions, pred_prompts, audios, transcriptions = compute_metrics(
1709
+ eval_preds, eval_descriptions, eval_prompts, accelerator.device
1710
+ )
1711
+ eval_metrics.update(metric_values)
1712
+ metrics_desc = " ".join([f"Eval {key}: {value} |" for key, value in metric_values.items()])
1713
+ if "wandb" in training_args.report_to:
1714
+ log_pred(
1715
+ accelerator,
1716
+ pred_descriptions,
1717
+ pred_prompts,
1718
+ transcriptions,
1719
+ audios,
1720
+ sampling_rate=sampling_rate,
1721
+ step=cur_step,
1722
+ prefix="eval",
1723
+ )
1724
+
1725
+ # Print metrics and update progress bar
1726
+ steps_trained_progress_bar.write(
1727
+ f"Eval results for step ({cur_step} / {total_train_steps} | Eval Loss: {eval_metrics['loss']} |"
1728
+ f" {metrics_desc})"
1729
+ )
1730
+
1731
+ log_metric(
1732
+ accelerator,
1733
+ metrics=eval_metrics,
1734
+ train_time=eval_time,
1735
+ step=cur_step,
1736
+ epoch=epoch + (cur_step - epoch * steps_per_epoch) / steps_per_epoch,
1737
+ prefix="eval",
1738
+ )
1739
+
1740
+ # release eval batch and relax metrics
1741
+ eval_metrics = []
1742
+ eval_preds = []
1743
+ eval_descriptions = []
1744
+ eval_prompts = []
1745
+ batch = release_memory(batch)
1746
+
1747
+ # flush the train metrics
1748
+ train_start = time.time()
1749
+
1750
+ # break condition
1751
+ if cur_step == total_train_steps:
1752
+ continue_training = False
1753
+ break
1754
+
1755
+ if not continue_training:
1756
+ break
1757
+
1758
+ accelerator.end_training()
1759
+
1760
+
1761
+ if __name__ == "__main__":
1762
+ set_start_method("spawn")
1763
+ main()
special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,941 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "32000": {
29
+ "content": "<extra_id_99>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "32001": {
37
+ "content": "<extra_id_98>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "32002": {
45
+ "content": "<extra_id_97>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "32003": {
53
+ "content": "<extra_id_96>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "32004": {
61
+ "content": "<extra_id_95>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "32005": {
69
+ "content": "<extra_id_94>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "32006": {
77
+ "content": "<extra_id_93>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "32007": {
85
+ "content": "<extra_id_92>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "32008": {
93
+ "content": "<extra_id_91>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "32009": {
101
+ "content": "<extra_id_90>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "32010": {
109
+ "content": "<extra_id_89>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "32011": {
117
+ "content": "<extra_id_88>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "32012": {
125
+ "content": "<extra_id_87>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "32013": {
133
+ "content": "<extra_id_86>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "32014": {
141
+ "content": "<extra_id_85>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "32015": {
149
+ "content": "<extra_id_84>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "32016": {
157
+ "content": "<extra_id_83>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "32017": {
165
+ "content": "<extra_id_82>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "32018": {
173
+ "content": "<extra_id_81>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "32019": {
181
+ "content": "<extra_id_80>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "32020": {
189
+ "content": "<extra_id_79>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "32021": {
197
+ "content": "<extra_id_78>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "32022": {
205
+ "content": "<extra_id_77>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "32023": {
213
+ "content": "<extra_id_76>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "32024": {
221
+ "content": "<extra_id_75>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "32025": {
229
+ "content": "<extra_id_74>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "32026": {
237
+ "content": "<extra_id_73>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "32027": {
245
+ "content": "<extra_id_72>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "32028": {
253
+ "content": "<extra_id_71>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "32029": {
261
+ "content": "<extra_id_70>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "32030": {
269
+ "content": "<extra_id_69>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "32031": {
277
+ "content": "<extra_id_68>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "32032": {
285
+ "content": "<extra_id_67>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "32033": {
293
+ "content": "<extra_id_66>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "32034": {
301
+ "content": "<extra_id_65>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "32035": {
309
+ "content": "<extra_id_64>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "32036": {
317
+ "content": "<extra_id_63>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "32037": {
325
+ "content": "<extra_id_62>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "32038": {
333
+ "content": "<extra_id_61>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "32039": {
341
+ "content": "<extra_id_60>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "32040": {
349
+ "content": "<extra_id_59>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "32041": {
357
+ "content": "<extra_id_58>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "32042": {
365
+ "content": "<extra_id_57>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "32043": {
373
+ "content": "<extra_id_56>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "32044": {
381
+ "content": "<extra_id_55>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "32045": {
389
+ "content": "<extra_id_54>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "32046": {
397
+ "content": "<extra_id_53>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "32047": {
405
+ "content": "<extra_id_52>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "32048": {
413
+ "content": "<extra_id_51>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "32049": {
421
+ "content": "<extra_id_50>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "32050": {
429
+ "content": "<extra_id_49>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "32051": {
437
+ "content": "<extra_id_48>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "32052": {
445
+ "content": "<extra_id_47>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "32053": {
453
+ "content": "<extra_id_46>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "32054": {
461
+ "content": "<extra_id_45>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "32055": {
469
+ "content": "<extra_id_44>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "32056": {
477
+ "content": "<extra_id_43>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "32057": {
485
+ "content": "<extra_id_42>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "32058": {
493
+ "content": "<extra_id_41>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "32059": {
501
+ "content": "<extra_id_40>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "32060": {
509
+ "content": "<extra_id_39>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "32061": {
517
+ "content": "<extra_id_38>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "32062": {
525
+ "content": "<extra_id_37>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "32063": {
533
+ "content": "<extra_id_36>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "32064": {
541
+ "content": "<extra_id_35>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "32065": {
549
+ "content": "<extra_id_34>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "32066": {
557
+ "content": "<extra_id_33>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "32067": {
565
+ "content": "<extra_id_32>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "32068": {
573
+ "content": "<extra_id_31>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "32069": {
581
+ "content": "<extra_id_30>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "32070": {
589
+ "content": "<extra_id_29>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "32071": {
597
+ "content": "<extra_id_28>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "32072": {
605
+ "content": "<extra_id_27>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "32073": {
613
+ "content": "<extra_id_26>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "32074": {
621
+ "content": "<extra_id_25>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "32075": {
629
+ "content": "<extra_id_24>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "32076": {
637
+ "content": "<extra_id_23>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "32077": {
645
+ "content": "<extra_id_22>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "32078": {
653
+ "content": "<extra_id_21>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "32079": {
661
+ "content": "<extra_id_20>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "32080": {
669
+ "content": "<extra_id_19>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "32081": {
677
+ "content": "<extra_id_18>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "32082": {
685
+ "content": "<extra_id_17>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "32083": {
693
+ "content": "<extra_id_16>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "32084": {
701
+ "content": "<extra_id_15>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "32085": {
709
+ "content": "<extra_id_14>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "32086": {
717
+ "content": "<extra_id_13>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "32087": {
725
+ "content": "<extra_id_12>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "32088": {
733
+ "content": "<extra_id_11>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "32089": {
741
+ "content": "<extra_id_10>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "32090": {
749
+ "content": "<extra_id_9>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "32091": {
757
+ "content": "<extra_id_8>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "32092": {
765
+ "content": "<extra_id_7>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "32093": {
773
+ "content": "<extra_id_6>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "32094": {
781
+ "content": "<extra_id_5>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "32095": {
789
+ "content": "<extra_id_4>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "32096": {
797
+ "content": "<extra_id_3>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "32097": {
805
+ "content": "<extra_id_2>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "32098": {
813
+ "content": "<extra_id_1>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "32099": {
821
+ "content": "<extra_id_0>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ }
828
+ },
829
+ "additional_special_tokens": [
830
+ "<extra_id_0>",
831
+ "<extra_id_1>",
832
+ "<extra_id_2>",
833
+ "<extra_id_3>",
834
+ "<extra_id_4>",
835
+ "<extra_id_5>",
836
+ "<extra_id_6>",
837
+ "<extra_id_7>",
838
+ "<extra_id_8>",
839
+ "<extra_id_9>",
840
+ "<extra_id_10>",
841
+ "<extra_id_11>",
842
+ "<extra_id_12>",
843
+ "<extra_id_13>",
844
+ "<extra_id_14>",
845
+ "<extra_id_15>",
846
+ "<extra_id_16>",
847
+ "<extra_id_17>",
848
+ "<extra_id_18>",
849
+ "<extra_id_19>",
850
+ "<extra_id_20>",
851
+ "<extra_id_21>",
852
+ "<extra_id_22>",
853
+ "<extra_id_23>",
854
+ "<extra_id_24>",
855
+ "<extra_id_25>",
856
+ "<extra_id_26>",
857
+ "<extra_id_27>",
858
+ "<extra_id_28>",
859
+ "<extra_id_29>",
860
+ "<extra_id_30>",
861
+ "<extra_id_31>",
862
+ "<extra_id_32>",
863
+ "<extra_id_33>",
864
+ "<extra_id_34>",
865
+ "<extra_id_35>",
866
+ "<extra_id_36>",
867
+ "<extra_id_37>",
868
+ "<extra_id_38>",
869
+ "<extra_id_39>",
870
+ "<extra_id_40>",
871
+ "<extra_id_41>",
872
+ "<extra_id_42>",
873
+ "<extra_id_43>",
874
+ "<extra_id_44>",
875
+ "<extra_id_45>",
876
+ "<extra_id_46>",
877
+ "<extra_id_47>",
878
+ "<extra_id_48>",
879
+ "<extra_id_49>",
880
+ "<extra_id_50>",
881
+ "<extra_id_51>",
882
+ "<extra_id_52>",
883
+ "<extra_id_53>",
884
+ "<extra_id_54>",
885
+ "<extra_id_55>",
886
+ "<extra_id_56>",
887
+ "<extra_id_57>",
888
+ "<extra_id_58>",
889
+ "<extra_id_59>",
890
+ "<extra_id_60>",
891
+ "<extra_id_61>",
892
+ "<extra_id_62>",
893
+ "<extra_id_63>",
894
+ "<extra_id_64>",
895
+ "<extra_id_65>",
896
+ "<extra_id_66>",
897
+ "<extra_id_67>",
898
+ "<extra_id_68>",
899
+ "<extra_id_69>",
900
+ "<extra_id_70>",
901
+ "<extra_id_71>",
902
+ "<extra_id_72>",
903
+ "<extra_id_73>",
904
+ "<extra_id_74>",
905
+ "<extra_id_75>",
906
+ "<extra_id_76>",
907
+ "<extra_id_77>",
908
+ "<extra_id_78>",
909
+ "<extra_id_79>",
910
+ "<extra_id_80>",
911
+ "<extra_id_81>",
912
+ "<extra_id_82>",
913
+ "<extra_id_83>",
914
+ "<extra_id_84>",
915
+ "<extra_id_85>",
916
+ "<extra_id_86>",
917
+ "<extra_id_87>",
918
+ "<extra_id_88>",
919
+ "<extra_id_89>",
920
+ "<extra_id_90>",
921
+ "<extra_id_91>",
922
+ "<extra_id_92>",
923
+ "<extra_id_93>",
924
+ "<extra_id_94>",
925
+ "<extra_id_95>",
926
+ "<extra_id_96>",
927
+ "<extra_id_97>",
928
+ "<extra_id_98>",
929
+ "<extra_id_99>"
930
+ ],
931
+ "clean_up_tokenization_spaces": true,
932
+ "eos_token": "</s>",
933
+ "extra_ids": 100,
934
+ "legacy": true,
935
+ "model_max_length": 512,
936
+ "pad_token": "<pad>",
937
+ "padding_side": "left",
938
+ "sp_model_kwargs": {},
939
+ "tokenizer_class": "T5Tokenizer",
940
+ "unk_token": "<unk>"
941
+ }
wandb/debug-cli.sanchit.log ADDED
File without changes
wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-13 20:57:08,363 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
2
+ 2024-05-13 20:57:08,363 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Configure stats pid to 1264178
3
+ 2024-05-13 20:57:08,363 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
4
+ 2024-05-13 20:57:08,363 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Loading settings from /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/settings
5
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_parler_tts_training.py', 'program_abspath': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/run_parler_tts_training.py', 'program': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py'}
8
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_init.py:_log_setup():520] Logging user logs to /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205708-e3sq5zz5/logs/debug.log
10
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_init.py:_log_setup():521] Logging internal logs to /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205708-e3sq5zz5/logs/debug-internal.log
11
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_init.py:init():560] calling init triggers
12
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_init.py:init():610] starting backend
15
+ 2024-05-13 20:57:08,364 INFO MainThread:1264178 [wandb_init.py:init():614] setting up manager
16
+ 2024-05-13 20:57:08,367 INFO MainThread:1264178 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-05-13 20:57:08,368 INFO MainThread:1264178 [wandb_init.py:init():622] backend started and connected
18
+ 2024-05-13 20:57:08,369 INFO MainThread:1264178 [wandb_init.py:init():711] updated telemetry
19
+ 2024-05-13 20:57:08,373 INFO MainThread:1264178 [wandb_init.py:init():744] communicating run to backend with 90.0 second timeout
20
+ 2024-05-13 20:57:08,727 INFO MainThread:1264178 [wandb_run.py:_on_init():2396] communicating current version
21
+ 2024-05-13 20:57:08,782 INFO MainThread:1264178 [wandb_run.py:_on_init():2405] got version response
22
+ 2024-05-13 20:57:08,782 INFO MainThread:1264178 [wandb_init.py:init():795] starting run threads in backend
23
+ 2024-05-13 20:57:11,328 INFO MainThread:1264178 [wandb_run.py:_console_start():2374] atexit reg
24
+ 2024-05-13 20:57:11,328 INFO MainThread:1264178 [wandb_run.py:_redirect():2229] redirect: wrap_raw
25
+ 2024-05-13 20:57:11,328 INFO MainThread:1264178 [wandb_run.py:_redirect():2294] Wrapping output streams.
26
+ 2024-05-13 20:57:11,329 INFO MainThread:1264178 [wandb_run.py:_redirect():2319] Redirects installed.
27
+ 2024-05-13 20:57:11,329 INFO MainThread:1264178 [wandb_init.py:init():838] run started, returning control to user process
28
+ 2024-05-13 20:57:11,330 INFO MainThread:1264178 [wandb_run.py:_config_callback():1376] config_cb None None {'learning_rate': 8e-05, 'model_name_or_path': 'parler-tts/parler_tts_mini_v0.1', 'num_train_epochs': 8.0, 'gradient_accumulation_steps': 8, 'per_device_train_batch_size': 16, 'global_batch_size': 16, 'mixed_precision': 'bf16', 'lr_scheduler_type': 'SchedulerType.COSINE', 'warmup_steps': 250, 'freeze_text_encoder': True, 'max_duration_in_seconds': 30.0, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'temperature': 1.0}
29
+ 2024-05-13 21:45:31,478 INFO MainThread:1264178 [wandb_run.py:_finish():2103] finishing run sanchit-gandhi/parler-speech/e3sq5zz5
30
+ 2024-05-13 21:45:31,478 INFO MainThread:1264178 [wandb_run.py:_atexit_cleanup():2343] got exitcode: 0
31
+ 2024-05-13 21:45:31,478 INFO MainThread:1264178 [wandb_run.py:_restore():2326] restore
32
+ 2024-05-13 21:45:31,478 INFO MainThread:1264178 [wandb_run.py:_restore():2332] restore done
33
+ 2024-05-13 21:45:45,893 INFO MainThread:1264178 [wandb_run.py:_footer_history_summary_info():3994] rendering history
34
+ 2024-05-13 21:45:45,894 INFO MainThread:1264178 [wandb_run.py:_footer_history_summary_info():4026] rendering summary
35
+ 2024-05-13 21:45:45,897 INFO MainThread:1264178 [wandb_run.py:_footer_sync_info():3953] logging synced files
wandb/run-20240513_204644-y7fy6vtp/files/conda-environment.yaml ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: venv
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - _libgcc_mutex=0.1=main
6
+ - _openmp_mutex=5.1=1_gnu
7
+ - bzip2=1.0.8=h5eee18b_6
8
+ - ca-certificates=2024.3.11=h06a4308_0
9
+ - ld_impl_linux-64=2.38=h1181459_1
10
+ - libffi=3.4.4=h6a678d5_1
11
+ - libgcc-ng=11.2.0=h1234567_1
12
+ - libgomp=11.2.0=h1234567_1
13
+ - libstdcxx-ng=11.2.0=h1234567_1
14
+ - libuuid=1.41.5=h5eee18b_0
15
+ - ncurses=6.4=h6a678d5_0
16
+ - openssl=3.0.13=h7f8727e_1
17
+ - pip=24.0=py311h06a4308_0
18
+ - python=3.11.9=h955ad1f_0
19
+ - readline=8.2=h5eee18b_0
20
+ - setuptools=69.5.1=py311h06a4308_0
21
+ - sqlite=3.45.3=h5eee18b_0
22
+ - tk=8.6.14=h39e8969_0
23
+ - wheel=0.43.0=py311h06a4308_0
24
+ - xz=5.4.6=h5eee18b_1
25
+ - zlib=1.2.13=h5eee18b_1
26
+ - pip:
27
+ - absl-py==2.1.0
28
+ - accelerate==0.30.0
29
+ - aiohttp==3.9.5
30
+ - aiosignal==1.3.1
31
+ - aniso8601==9.0.1
32
+ - annotated-types==0.6.0
33
+ - anyio==4.3.0
34
+ - argbind==0.3.7
35
+ - argon2-cffi==23.1.0
36
+ - argon2-cffi-bindings==21.2.0
37
+ - arrow==1.3.0
38
+ - asttokens==2.4.1
39
+ - async-lru==2.0.4
40
+ - attrs==23.2.0
41
+ - audioread==3.0.1
42
+ - babel==2.15.0
43
+ - beautifulsoup4==4.12.3
44
+ - bidict==0.23.1
45
+ - bitsandbytes==0.43.1
46
+ - bleach==6.1.0
47
+ - certifi==2024.2.2
48
+ - cffi==1.16.0
49
+ - charset-normalizer==3.3.2
50
+ - click==8.1.7
51
+ - coloredlogs==14.0
52
+ - comm==0.2.2
53
+ - contourpy==1.2.1
54
+ - cycler==0.12.1
55
+ - datasets==2.19.1
56
+ - debugpy==1.8.1
57
+ - decorator==5.1.1
58
+ - defusedxml==0.7.1
59
+ - descript-audio-codec==1.0.0
60
+ - descript-audiotools==0.7.2
61
+ - dill==0.3.8
62
+ - dnspython==2.3.0
63
+ - docker-pycreds==0.4.0
64
+ - docstring-parser==0.16
65
+ - editdistance==0.8.1
66
+ - einops==0.8.0
67
+ - et-xmlfile==1.1.0
68
+ - evaluate==0.4.2
69
+ - eventlet==0.36.1
70
+ - executing==2.0.1
71
+ - fastjsonschema==2.19.1
72
+ - ffmpy==0.3.2
73
+ - filelock==3.14.0
74
+ - fire==0.6.0
75
+ - flask==2.2.5
76
+ - flask-cors==4.0.1
77
+ - flask-restful==0.3.10
78
+ - flask-socketio==5.3.6
79
+ - flask-talisman==1.1.0
80
+ - flatten-dict==0.4.2
81
+ - fonttools==4.51.0
82
+ - fqdn==1.5.1
83
+ - frozenlist==1.4.1
84
+ - fsspec==2024.3.1
85
+ - future==1.0.0
86
+ - g2p==2.0.0
87
+ - gitdb==4.0.11
88
+ - gitpython==3.1.43
89
+ - greenlet==3.0.3
90
+ - grpcio==1.63.0
91
+ - h11==0.14.0
92
+ - httpcore==1.0.5
93
+ - httpx==0.27.0
94
+ - huggingface-hub==0.23.0
95
+ - humanfriendly==10.0
96
+ - idna==3.7
97
+ - importlib-resources==6.4.0
98
+ - ipdb==0.13.13
99
+ - ipykernel==6.29.4
100
+ - ipython==8.24.0
101
+ - isoduration==20.11.0
102
+ - itsdangerous==2.2.0
103
+ - jedi==0.19.1
104
+ - jinja2==3.1.4
105
+ - jiwer==3.0.4
106
+ - joblib==1.4.2
107
+ - json5==0.9.25
108
+ - jsonpointer==2.4
109
+ - jsonschema==4.22.0
110
+ - jsonschema-specifications==2023.12.1
111
+ - julius==0.2.7
112
+ - jupyter-client==8.6.1
113
+ - jupyter-core==5.7.2
114
+ - jupyter-events==0.10.0
115
+ - jupyter-lsp==2.2.5
116
+ - jupyter-server==2.14.0
117
+ - jupyter-server-terminals==0.5.3
118
+ - jupyterlab==4.2.0
119
+ - jupyterlab-pygments==0.3.0
120
+ - jupyterlab-server==2.27.1
121
+ - kiwisolver==1.4.5
122
+ - lazy-loader==0.4
123
+ - librosa==0.10.2
124
+ - llvmlite==0.42.0
125
+ - markdown==3.6
126
+ - markdown-it-py==3.0.0
127
+ - markdown2==2.4.13
128
+ - markupsafe==2.1.5
129
+ - matplotlib==3.8.4
130
+ - matplotlib-inline==0.1.7
131
+ - mdurl==0.1.2
132
+ - mistune==3.0.2
133
+ - mpmath==1.3.0
134
+ - msgpack==1.0.8
135
+ - multidict==6.0.5
136
+ - multiprocess==0.70.16
137
+ - munkres==1.1.4
138
+ - nbclient==0.10.0
139
+ - nbconvert==7.16.4
140
+ - nbformat==5.10.4
141
+ - nest-asyncio==1.6.0
142
+ - networkx==3.3
143
+ - notebook-shim==0.2.4
144
+ - numba==0.59.1
145
+ - numpy==1.26.4
146
+ - nvidia-cublas-cu12==12.1.3.1
147
+ - nvidia-cuda-cupti-cu12==12.1.105
148
+ - nvidia-cuda-nvrtc-cu12==12.1.105
149
+ - nvidia-cuda-runtime-cu12==12.1.105
150
+ - nvidia-cudnn-cu12==8.9.2.26
151
+ - nvidia-cufft-cu12==11.0.2.54
152
+ - nvidia-curand-cu12==10.3.2.106
153
+ - nvidia-cusolver-cu12==11.4.5.107
154
+ - nvidia-cusparse-cu12==12.1.0.106
155
+ - nvidia-nccl-cu12==2.20.5
156
+ - nvidia-nvjitlink-cu12==12.4.127
157
+ - nvidia-nvtx-cu12==12.1.105
158
+ - openpyxl==3.1.2
159
+ - overrides==7.7.0
160
+ - packaging==24.0
161
+ - pandas==2.2.2
162
+ - pandocfilters==1.5.1
163
+ - panphon==0.20.0
164
+ - parler-tts==0.1
165
+ - parso==0.8.4
166
+ - pexpect==4.9.0
167
+ - pillow==10.3.0
168
+ - platformdirs==4.2.1
169
+ - pooch==1.8.1
170
+ - prometheus-client==0.20.0
171
+ - prompt-toolkit==3.0.43
172
+ - protobuf==3.19.6
173
+ - psutil==5.9.8
174
+ - ptyprocess==0.7.0
175
+ - pure-eval==0.2.2
176
+ - pyarrow==16.0.0
177
+ - pyarrow-hotfix==0.6
178
+ - pycparser==2.22
179
+ - pydantic==2.7.1
180
+ - pydantic-core==2.18.2
181
+ - pygments==2.18.0
182
+ - pyloudnorm==0.1.1
183
+ - pyparsing==3.1.2
184
+ - pystoi==0.4.1
185
+ - python-dateutil==2.9.0.post0
186
+ - python-engineio==4.9.0
187
+ - python-json-logger==2.0.7
188
+ - python-socketio==5.11.2
189
+ - pytz==2024.1
190
+ - pyyaml==6.0.1
191
+ - pyzmq==26.0.3
192
+ - randomname==0.2.1
193
+ - rapidfuzz==3.9.0
194
+ - referencing==0.35.1
195
+ - regex==2024.4.28
196
+ - requests==2.31.0
197
+ - rfc3339-validator==0.1.4
198
+ - rfc3986-validator==0.1.1
199
+ - rich==13.7.1
200
+ - rpds-py==0.18.1
201
+ - safetensors==0.4.3
202
+ - scikit-learn==1.4.2
203
+ - scipy==1.13.0
204
+ - send2trash==1.8.3
205
+ - sentencepiece==0.2.0
206
+ - sentry-sdk==2.1.1
207
+ - setproctitle==1.3.3
208
+ - simple-websocket==1.0.0
209
+ - six==1.16.0
210
+ - smmap==5.0.1
211
+ - sniffio==1.3.1
212
+ - soundfile==0.12.1
213
+ - soupsieve==2.5
214
+ - soxr==0.3.7
215
+ - stack-data==0.6.3
216
+ - sympy==1.12
217
+ - tensorboard==2.16.2
218
+ - tensorboard-data-server==0.7.2
219
+ - termcolor==2.4.0
220
+ - terminado==0.18.1
221
+ - text-unidecode==1.3
222
+ - threadpoolctl==3.5.0
223
+ - tinycss2==1.3.0
224
+ - tokenizers==0.19.1
225
+ - torch==2.3.0
226
+ - torch-stoi==0.2.1
227
+ - torchaudio==2.3.0
228
+ - tornado==6.4
229
+ - tqdm==4.66.4
230
+ - traitlets==5.14.3
231
+ - transformers==4.41.0.dev0
232
+ - triton==2.3.0
233
+ - types-python-dateutil==2.9.0.20240316
234
+ - typing-extensions==4.11.0
235
+ - tzdata==2024.1
236
+ - unicodecsv==0.14.1
237
+ - uri-template==1.3.0
238
+ - urllib3==2.2.1
239
+ - wandb==0.17.0
240
+ - wcwidth==0.2.13
241
+ - webcolors==1.13
242
+ - webencodings==0.5.1
243
+ - websocket-client==1.8.0
244
+ - werkzeug==3.0.3
245
+ - wsproto==1.2.0
246
+ - xxhash==3.4.1
247
+ - yarl==1.9.4
248
+ prefix: /home/sanchit/miniconda3/envs/venv
wandb/run-20240513_204644-y7fy6vtp/files/config.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.9
7
+ cli_version: 0.17.0
8
+ framework: huggingface
9
+ huggingface_version: 4.41.0.dev0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1715626004
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 49
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ - 100
24
+ 2:
25
+ - 1
26
+ - 5
27
+ - 11
28
+ - 49
29
+ - 51
30
+ - 53
31
+ - 55
32
+ - 71
33
+ - 100
34
+ 3:
35
+ - 23
36
+ 4: 3.11.9
37
+ 5: 0.17.0
38
+ 6: 4.41.0.dev0
39
+ 8:
40
+ - 5
41
+ 13: linux-x86_64
42
+ learning_rate:
43
+ desc: null
44
+ value: 8.0e-05
45
+ model_name_or_path:
46
+ desc: null
47
+ value: parler-tts/parler_tts_mini_v0.1
48
+ num_train_epochs:
49
+ desc: null
50
+ value: 8.0
51
+ gradient_accumulation_steps:
52
+ desc: null
53
+ value: 8
54
+ per_device_train_batch_size:
55
+ desc: null
56
+ value: 16
57
+ global_batch_size:
58
+ desc: null
59
+ value: 16
60
+ mixed_precision:
61
+ desc: null
62
+ value: bf16
63
+ lr_scheduler_type:
64
+ desc: null
65
+ value: SchedulerType.COSINE
66
+ warmup_steps:
67
+ desc: null
68
+ value: 250
69
+ freeze_text_encoder:
70
+ desc: null
71
+ value: true
72
+ max_duration_in_seconds:
73
+ desc: null
74
+ value: 30.0
75
+ weight_decay:
76
+ desc: null
77
+ value: 0.01
78
+ adam_beta1:
79
+ desc: null
80
+ value: 0.9
81
+ adam_beta2:
82
+ desc: null
83
+ value: 0.99
84
+ temperature:
85
+ desc: null
86
+ value: 1.0
wandb/run-20240513_204644-y7fy6vtp/files/output.log ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/13/2024 20:46:47 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
2
+ 05/13/2024 20:46:47 - INFO - __main__ - Training/evaluation parameters ParlerTTSTrainingArguments(
3
+ _n_gpu=1,
4
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},
5
+ adafactor=False,
6
+ adam_beta1=0.9,
7
+ adam_beta2=0.99,
8
+ adam_epsilon=1e-08,
9
+ audio_encoder_per_device_batch_size=4,
10
+ auto_find_batch_size=False,
11
+ batch_eval_metrics=False,
12
+ bf16=False,
13
+ bf16_full_eval=False,
14
+ data_seed=None,
15
+ dataloader_drop_last=False,
16
+ dataloader_num_workers=4,
17
+ dataloader_persistent_workers=False,
18
+ dataloader_pin_memory=True,
19
+ dataloader_prefetch_factor=None,
20
+ ddp_backend=None,
21
+ ddp_broadcast_buffers=None,
22
+ ddp_bucket_cap_mb=None,
23
+ ddp_find_unused_parameters=None,
24
+ ddp_timeout=1800,
25
+ debug=[],
26
+ deepspeed=None,
27
+ disable_tqdm=False,
28
+ dispatch_batches=None,
29
+ do_eval=True,
30
+ do_predict=False,
31
+ do_train=True,
32
+ dtype=bfloat16,
33
+ eval_accumulation_steps=None,
34
+ eval_delay=0,
35
+ eval_do_concat_batches=True,
36
+ eval_steps=None,
37
+ eval_strategy=IntervalStrategy.EPOCH,
38
+ evaluation_strategy=epoch,
39
+ fp16=False,
40
+ fp16_backend=auto,
41
+ fp16_full_eval=False,
42
+ fp16_opt_level=O1,
43
+ fsdp=[],
44
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
45
+ fsdp_min_num_params=0,
46
+ fsdp_transformer_layer_cls_to_wrap=None,
47
+ full_determinism=False,
48
+ generation_config=None,
49
+ generation_max_length=None,
50
+ generation_num_beams=None,
51
+ gradient_accumulation_steps=8,
52
+ gradient_checkpointing=True,
53
+ gradient_checkpointing_kwargs=None,
54
+ greater_is_better=None,
55
+ group_by_length=True,
56
+ half_precision_backend=auto,
57
+ hub_always_push=False,
58
+ hub_model_id=None,
59
+ hub_private_repo=False,
60
+ hub_strategy=HubStrategy.EVERY_SAVE,
61
+ hub_token=<HUB_TOKEN>,
62
+ ignore_data_skip=False,
63
+ include_inputs_for_metrics=True,
64
+ include_num_input_tokens_seen=False,
65
+ include_tokens_per_second=False,
66
+ jit_mode_eval=False,
67
+ label_names=None,
68
+ label_smoothing_factor=0.0,
69
+ learning_rate=8e-05,
70
+ length_column_name=length,
71
+ load_best_model_at_end=False,
72
+ local_rank=0,
73
+ log_level=passive,
74
+ log_level_replica=warning,
75
+ log_on_each_node=True,
76
+ logging_dir=../output_dir_training_concat/runs/May13_20-46-43_hf-dgx-01,
77
+ logging_first_step=False,
78
+ logging_nan_inf_filter=True,
79
+ logging_steps=2,
80
+ logging_strategy=IntervalStrategy.STEPS,
81
+ lr_scheduler_kwargs={},
82
+ lr_scheduler_type=SchedulerType.COSINE,
83
+ max_grad_norm=1.0,
84
+ max_steps=-1,
85
+ metric_for_best_model=None,
86
+ mp_parameters=,
87
+ neftune_noise_alpha=None,
88
+ no_cuda=False,
89
+ num_train_epochs=8.0,
90
+ optim=OptimizerNames.ADAMW_TORCH,
91
+ optim_args=None,
92
+ optim_target_modules=None,
93
+ output_dir=../output_dir_training_concat/,
94
+ overwrite_output_dir=True,
95
+ past_index=-1,
96
+ per_device_eval_batch_size=16,
97
+ per_device_train_batch_size=16,
98
+ predict_with_generate=True,
99
+ prediction_loss_only=False,
100
+ push_to_hub=False,
101
+ push_to_hub_model_id=None,
102
+ push_to_hub_organization=None,
103
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
104
+ ray_scope=last,
105
+ remove_unused_columns=True,
106
+ report_to=['wandb'],
107
+ restore_callback_states_from_checkpoint=False,
108
+ resume_from_checkpoint=None,
109
+ run_name=../output_dir_training_concat/,
110
+ save_on_each_node=False,
111
+ save_only_model=False,
112
+ save_safetensors=True,
113
+ save_steps=72,
114
+ save_strategy=IntervalStrategy.STEPS,
115
+ save_total_limit=5,
116
+ seed=456,
117
+ skip_memory_metrics=True,
118
+ sortish_sampler=False,
119
+ split_batches=None,
120
+ tf32=None,
121
+ torch_compile=False,
122
+ torch_compile_backend=None,
123
+ torch_compile_mode=None,
124
+ torchdynamo=None,
125
+ tpu_metrics_debug=False,
126
+ tpu_num_cores=None,
127
+ use_cpu=False,
128
+ use_ipex=False,
129
+ use_legacy_prediction_loop=False,
130
+ use_mps_device=False,
131
+ warmup_ratio=0.0,
132
+ warmup_steps=250,
133
+ weight_decay=0.01,
134
+ )
135
+ 05/13/2024 20:46:48 - WARNING - __main__ - Disabling fast tokenizer warning: https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L3231-L3235
136
+ loading configuration file preprocessor_config.json from cache at /raid/.cache/huggingface/models--parler-tts--dac_44khZ_8kbps/snapshots/db52bea859d9411e0beb44a3ea923a8731ee4197/preprocessor_config.json
137
+ Feature extractor EncodecFeatureExtractor {
138
+ "chunk_length_s": null,
139
+ "feature_extractor_type": "EncodecFeatureExtractor",
140
+ "feature_size": 1,
141
+ "overlap": null,
142
+ "padding_side": "right",
143
+ "padding_value": 0.0,
144
+ "return_attention_mask": true,
145
+ "sampling_rate": 44100
146
+ }
147
+ loading file spiece.model from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/spiece.model
148
+ loading file tokenizer.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer.json
149
+ loading file added_tokens.json from cache at None
150
+ loading file special_tokens_map.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/special_tokens_map.json
151
+ loading file tokenizer_config.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer_config.json
152
+ You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
153
+ loading file spiece.model from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/spiece.model
154
+ loading file tokenizer.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer.json
155
+ loading file added_tokens.json from cache at None
156
+ loading file special_tokens_map.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/special_tokens_map.json
157
+ loading file tokenizer_config.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer_config.json
158
+ Downloading readme: 100%|██████████████████████████████████████████████████████████████| 414/414 [00:00<00:00, 2.21MB/s]
159
+ Downloading readme: 0%| | 0.00/414 [00:00<?, ?B/s]
160
+
161
+
162
+
163
+
164
+ Generating train split: 100%|███████████████████████████████████████████████| 5347/5347 [00:10<00:00, 996.42 examples/s]
165
+ Downloading readme: 100%|██████████████████████████████████████████████████████████████| 470/470 [00:00<00:00, 1.99MB/s]
166
+ Combining datasets...: 0%| | 0/1 [03:44<?, ?it/s]
167
+ Traceback (most recent call last):
168
+ File "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py", line 1763, in <module>
169
+ main()
170
+ File "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py", line 950, in main
171
+ raw_datasets["train"] = load_multiple_datasets(
172
+ ^^^^^^^^^^^^^^^^^^^^^^^
173
+ File "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py", line 693, in load_multiple_datasets
174
+ metadata_dataset = load_dataset(
175
+ ^^^^^^^^^^^^^
176
+ File "/home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/datasets/load.py", line 2587, in load_dataset
177
+ builder_instance = load_dataset_builder(
178
+ ^^^^^^^^^^^^^^^^^^^^^
179
+ File "/home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/datasets/load.py", line 2296, in load_dataset_builder
180
+ builder_instance: DatasetBuilder = builder_cls(
181
+ ^^^^^^^^^^^^
182
+ File "/home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/datasets/builder.py", line 374, in __init__
183
+ self.config, self.config_id = self._create_builder_config(
184
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
185
+ File "/home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/datasets/builder.py", line 599, in _create_builder_config
186
+ raise ValueError(
187
+ ValueError: BuilderConfig 'read' not found. Available: ['default']
wandb/run-20240513_204644-y7fy6vtp/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.15.0
2
+ Flask-Cors==4.0.1
3
+ Flask-RESTful==0.3.10
4
+ Flask-SocketIO==5.3.6
5
+ Flask==2.2.5
6
+ GitPython==3.1.43
7
+ Jinja2==3.1.4
8
+ Markdown==3.6
9
+ MarkupSafe==2.1.5
10
+ PyYAML==6.0.1
11
+ Pygments==2.18.0
12
+ Send2Trash==1.8.3
13
+ Werkzeug==3.0.3
14
+ absl-py==2.1.0
15
+ accelerate==0.30.0
16
+ aiohttp==3.9.5
17
+ aiosignal==1.3.1
18
+ aniso8601==9.0.1
19
+ annotated-types==0.6.0
20
+ anyio==4.3.0
21
+ argbind==0.3.7
22
+ argon2-cffi-bindings==21.2.0
23
+ argon2-cffi==23.1.0
24
+ arrow==1.3.0
25
+ asttokens==2.4.1
26
+ async-lru==2.0.4
27
+ attrs==23.2.0
28
+ audioread==3.0.1
29
+ beautifulsoup4==4.12.3
30
+ bidict==0.23.1
31
+ bitsandbytes==0.43.1
32
+ bleach==6.1.0
33
+ certifi==2024.2.2
34
+ cffi==1.16.0
35
+ charset-normalizer==3.3.2
36
+ click==8.1.7
37
+ coloredlogs==14.0
38
+ comm==0.2.2
39
+ contourpy==1.2.1
40
+ cycler==0.12.1
41
+ datasets==2.19.1
42
+ debugpy==1.8.1
43
+ decorator==5.1.1
44
+ defusedxml==0.7.1
45
+ descript-audio-codec==1.0.0
46
+ descript-audiotools==0.7.2
47
+ dill==0.3.8
48
+ dnspython==2.3.0
49
+ docker-pycreds==0.4.0
50
+ docstring_parser==0.16
51
+ editdistance==0.8.1
52
+ einops==0.8.0
53
+ et-xmlfile==1.1.0
54
+ evaluate==0.4.2
55
+ eventlet==0.36.1
56
+ executing==2.0.1
57
+ fastjsonschema==2.19.1
58
+ ffmpy==0.3.2
59
+ filelock==3.14.0
60
+ fire==0.6.0
61
+ flask-talisman==1.1.0
62
+ flatten-dict==0.4.2
63
+ fonttools==4.51.0
64
+ fqdn==1.5.1
65
+ frozenlist==1.4.1
66
+ fsspec==2024.3.1
67
+ future==1.0.0
68
+ g2p==2.0.0
69
+ gitdb==4.0.11
70
+ greenlet==3.0.3
71
+ grpcio==1.63.0
72
+ h11==0.14.0
73
+ httpcore==1.0.5
74
+ httpx==0.27.0
75
+ huggingface-hub==0.23.0
76
+ humanfriendly==10.0
77
+ idna==3.7
78
+ importlib_resources==6.4.0
79
+ ipdb==0.13.13
80
+ ipykernel==6.29.4
81
+ ipython==8.24.0
82
+ isoduration==20.11.0
83
+ itsdangerous==2.2.0
84
+ jedi==0.19.1
85
+ jiwer==3.0.4
86
+ joblib==1.4.2
87
+ json5==0.9.25
88
+ jsonpointer==2.4
89
+ jsonschema-specifications==2023.12.1
90
+ jsonschema==4.22.0
91
+ julius==0.2.7
92
+ jupyter-events==0.10.0
93
+ jupyter-lsp==2.2.5
94
+ jupyter_client==8.6.1
95
+ jupyter_core==5.7.2
96
+ jupyter_server==2.14.0
97
+ jupyter_server_terminals==0.5.3
98
+ jupyterlab==4.2.0
99
+ jupyterlab_pygments==0.3.0
100
+ jupyterlab_server==2.27.1
101
+ kiwisolver==1.4.5
102
+ lazy_loader==0.4
103
+ librosa==0.10.2
104
+ llvmlite==0.42.0
105
+ markdown-it-py==3.0.0
106
+ markdown2==2.4.13
107
+ matplotlib-inline==0.1.7
108
+ matplotlib==3.8.4
109
+ mdurl==0.1.2
110
+ mistune==3.0.2
111
+ mpmath==1.3.0
112
+ msgpack==1.0.8
113
+ multidict==6.0.5
114
+ multiprocess==0.70.16
115
+ munkres==1.1.4
116
+ nbclient==0.10.0
117
+ nbconvert==7.16.4
118
+ nbformat==5.10.4
119
+ nest-asyncio==1.6.0
120
+ networkx==3.3
121
+ notebook_shim==0.2.4
122
+ numba==0.59.1
123
+ numpy==1.26.4
124
+ nvidia-cublas-cu12==12.1.3.1
125
+ nvidia-cuda-cupti-cu12==12.1.105
126
+ nvidia-cuda-nvrtc-cu12==12.1.105
127
+ nvidia-cuda-runtime-cu12==12.1.105
128
+ nvidia-cudnn-cu12==8.9.2.26
129
+ nvidia-cufft-cu12==11.0.2.54
130
+ nvidia-curand-cu12==10.3.2.106
131
+ nvidia-cusolver-cu12==11.4.5.107
132
+ nvidia-cusparse-cu12==12.1.0.106
133
+ nvidia-nccl-cu12==2.20.5
134
+ nvidia-nvjitlink-cu12==12.4.127
135
+ nvidia-nvtx-cu12==12.1.105
136
+ openpyxl==3.1.2
137
+ overrides==7.7.0
138
+ packaging==24.0
139
+ pandas==2.2.2
140
+ pandocfilters==1.5.1
141
+ panphon==0.20.0
142
+ parler_tts==0.1
143
+ parso==0.8.4
144
+ pexpect==4.9.0
145
+ pillow==10.3.0
146
+ pip==24.0
147
+ platformdirs==4.2.1
148
+ pooch==1.8.1
149
+ prometheus_client==0.20.0
150
+ prompt-toolkit==3.0.43
151
+ protobuf==3.19.6
152
+ psutil==5.9.8
153
+ ptyprocess==0.7.0
154
+ pure-eval==0.2.2
155
+ pyarrow-hotfix==0.6
156
+ pyarrow==16.0.0
157
+ pycparser==2.22
158
+ pydantic==2.7.1
159
+ pydantic_core==2.18.2
160
+ pyloudnorm==0.1.1
161
+ pyparsing==3.1.2
162
+ pystoi==0.4.1
163
+ python-dateutil==2.9.0.post0
164
+ python-engineio==4.9.0
165
+ python-json-logger==2.0.7
166
+ python-socketio==5.11.2
167
+ pytz==2024.1
168
+ pyzmq==26.0.3
169
+ randomname==0.2.1
170
+ rapidfuzz==3.9.0
171
+ referencing==0.35.1
172
+ regex==2024.4.28
173
+ requests==2.31.0
174
+ rfc3339-validator==0.1.4
175
+ rfc3986-validator==0.1.1
176
+ rich==13.7.1
177
+ rpds-py==0.18.1
178
+ safetensors==0.4.3
179
+ scikit-learn==1.4.2
180
+ scipy==1.13.0
181
+ sentencepiece==0.2.0
182
+ sentry-sdk==2.1.1
183
+ setproctitle==1.3.3
184
+ setuptools==69.5.1
185
+ simple-websocket==1.0.0
186
+ six==1.16.0
187
+ smmap==5.0.1
188
+ sniffio==1.3.1
189
+ soundfile==0.12.1
190
+ soupsieve==2.5
191
+ soxr==0.3.7
192
+ stack-data==0.6.3
193
+ sympy==1.12
194
+ tensorboard-data-server==0.7.2
195
+ tensorboard==2.16.2
196
+ termcolor==2.4.0
197
+ terminado==0.18.1
198
+ text-unidecode==1.3
199
+ threadpoolctl==3.5.0
200
+ tinycss2==1.3.0
201
+ tokenizers==0.19.1
202
+ torch-stoi==0.2.1
203
+ torch==2.3.0
204
+ torchaudio==2.3.0
205
+ tornado==6.4
206
+ tqdm==4.66.4
207
+ traitlets==5.14.3
208
+ transformers==4.41.0.dev0
209
+ transformers==4.41.0.dev0
210
+ triton==2.3.0
211
+ types-python-dateutil==2.9.0.20240316
212
+ typing_extensions==4.11.0
213
+ tzdata==2024.1
214
+ unicodecsv==0.14.1
215
+ uri-template==1.3.0
216
+ urllib3==2.2.1
217
+ wandb==0.17.0
218
+ wcwidth==0.2.13
219
+ webcolors==1.13
220
+ webencodings==0.5.1
221
+ websocket-client==1.8.0
222
+ wheel==0.43.0
223
+ wsproto==1.2.0
224
+ xxhash==3.4.1
225
+ yarl==1.9.4
wandb/run-20240513_204644-y7fy6vtp/files/wandb-metadata.json ADDED
@@ -0,0 +1,804 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-166-generic-x86_64-with-glibc2.31",
3
+ "python": "3.11.9",
4
+ "heartbeatAt": "2024-05-13T18:46:44.664254",
5
+ "startedAt": "2024-05-13T18:46:44.142721",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "--model_name_or_path",
10
+ "parler-tts/parler_tts_mini_v0.1",
11
+ "--feature_extractor_name",
12
+ "parler-tts/dac_44khZ_8kbps",
13
+ "--description_tokenizer_name",
14
+ "parler-tts/parler_tts_mini_v0.1",
15
+ "--prompt_tokenizer_name",
16
+ "parler-tts/parler_tts_mini_v0.1",
17
+ "--report_to",
18
+ "wandb",
19
+ "--overwrite_output_dir",
20
+ "true",
21
+ "--train_dataset_name",
22
+ "sanchit-gandhi/expresso-concatenated-half-normal",
23
+ "--train_metadata_dataset_name",
24
+ "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral",
25
+ "--train_dataset_config_name",
26
+ "read",
27
+ "--train_split_name",
28
+ "train",
29
+ "--eval_dataset_name",
30
+ "sanchit-gandhi/expresso-concatenated-half-normal",
31
+ "--eval_metadata_dataset_name",
32
+ "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral",
33
+ "--eval_dataset_config_name",
34
+ "read",
35
+ "--eval_split_name",
36
+ "train",
37
+ "--max_eval_samples",
38
+ "8",
39
+ "--per_device_eval_batch_size",
40
+ "16",
41
+ "--target_audio_column_name",
42
+ "audio",
43
+ "--description_column_name",
44
+ "text_description",
45
+ "--prompt_column_name",
46
+ "text",
47
+ "--max_duration_in_seconds",
48
+ "30.0",
49
+ "--min_duration_in_seconds",
50
+ "2.0",
51
+ "--max_text_length",
52
+ "400",
53
+ "--preprocessing_num_workers",
54
+ "2",
55
+ "--do_train",
56
+ "true",
57
+ "--num_train_epochs",
58
+ "8",
59
+ "--gradient_accumulation_steps",
60
+ "8",
61
+ "--gradient_checkpointing",
62
+ "true",
63
+ "--per_device_train_batch_size",
64
+ "16",
65
+ "--learning_rate",
66
+ "0.00008",
67
+ "--adam_beta1",
68
+ "0.9",
69
+ "--adam_beta2",
70
+ "0.99",
71
+ "--weight_decay",
72
+ "0.01",
73
+ "--lr_scheduler_type",
74
+ "cosine",
75
+ "--warmup_steps",
76
+ "250",
77
+ "--logging_steps",
78
+ "2",
79
+ "--freeze_text_encoder",
80
+ "true",
81
+ "--audio_encoder_per_device_batch_size",
82
+ "4",
83
+ "--dtype",
84
+ "bfloat16",
85
+ "--seed",
86
+ "456",
87
+ "--output_dir",
88
+ "../output_dir_training_concat/",
89
+ "--temporary_save_to_disk",
90
+ "../audio_code_tmp_concat/",
91
+ "--save_to_disk",
92
+ "../tmp_dataset_audio_concat/",
93
+ "--dataloader_num_workers",
94
+ "4",
95
+ "--do_eval",
96
+ "--predict_with_generate",
97
+ "--include_inputs_for_metrics",
98
+ "--save_strategy",
99
+ "steps",
100
+ "--save_steps",
101
+ "72",
102
+ "--evaluation_strategy",
103
+ "epoch",
104
+ "--save_total_limit",
105
+ "5",
106
+ "--group_by_length",
107
+ "true"
108
+ ],
109
+ "state": "running",
110
+ "program": "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py",
111
+ "codePathLocal": "run_parler_tts_training.py",
112
+ "codePath": "run_parler_tts_training.py",
113
+ "git": {
114
+ "remote": "https://huggingface.co/sanchit-gandhi/parler-tts-mini-v0.1-expresso-concatenated",
115
+ "commit": "99f75adb5e13ee0ad87cce8deb3e71adc370b10e"
116
+ },
117
+ "email": "sanchit@huggingface.co",
118
+ "root": "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated",
119
+ "host": "hf-dgx-01",
120
+ "username": "sanchit",
121
+ "executable": "/home/sanchit/miniconda3/envs/venv/bin/python",
122
+ "cpu_count": 64,
123
+ "cpu_count_logical": 128,
124
+ "cpu_freq": {
125
+ "current": 2713.9304453124996,
126
+ "min": 1500.0,
127
+ "max": 2250.0
128
+ },
129
+ "cpu_freq_per_core": [
130
+ {
131
+ "current": 2076.148,
132
+ "min": 1500.0,
133
+ "max": 2250.0
134
+ },
135
+ {
136
+ "current": 1783.251,
137
+ "min": 1500.0,
138
+ "max": 2250.0
139
+ },
140
+ {
141
+ "current": 1793.241,
142
+ "min": 1500.0,
143
+ "max": 2250.0
144
+ },
145
+ {
146
+ "current": 1796.012,
147
+ "min": 1500.0,
148
+ "max": 2250.0
149
+ },
150
+ {
151
+ "current": 2273.163,
152
+ "min": 1500.0,
153
+ "max": 2250.0
154
+ },
155
+ {
156
+ "current": 1749.542,
157
+ "min": 1500.0,
158
+ "max": 2250.0
159
+ },
160
+ {
161
+ "current": 1747.968,
162
+ "min": 1500.0,
163
+ "max": 2250.0
164
+ },
165
+ {
166
+ "current": 1749.532,
167
+ "min": 1500.0,
168
+ "max": 2250.0
169
+ },
170
+ {
171
+ "current": 3151.024,
172
+ "min": 1500.0,
173
+ "max": 2250.0
174
+ },
175
+ {
176
+ "current": 2454.223,
177
+ "min": 1500.0,
178
+ "max": 2250.0
179
+ },
180
+ {
181
+ "current": 1717.418,
182
+ "min": 1500.0,
183
+ "max": 2250.0
184
+ },
185
+ {
186
+ "current": 2440.313,
187
+ "min": 1500.0,
188
+ "max": 2250.0
189
+ },
190
+ {
191
+ "current": 1726.936,
192
+ "min": 1500.0,
193
+ "max": 2250.0
194
+ },
195
+ {
196
+ "current": 3285.322,
197
+ "min": 1500.0,
198
+ "max": 2250.0
199
+ },
200
+ {
201
+ "current": 1665.33,
202
+ "min": 1500.0,
203
+ "max": 2250.0
204
+ },
205
+ {
206
+ "current": 1714.033,
207
+ "min": 1500.0,
208
+ "max": 2250.0
209
+ },
210
+ {
211
+ "current": 1666.499,
212
+ "min": 1500.0,
213
+ "max": 2250.0
214
+ },
215
+ {
216
+ "current": 3313.14,
217
+ "min": 1500.0,
218
+ "max": 2250.0
219
+ },
220
+ {
221
+ "current": 3148.73,
222
+ "min": 1500.0,
223
+ "max": 2250.0
224
+ },
225
+ {
226
+ "current": 1667.633,
227
+ "min": 1500.0,
228
+ "max": 2250.0
229
+ },
230
+ {
231
+ "current": 3314.333,
232
+ "min": 1500.0,
233
+ "max": 2250.0
234
+ },
235
+ {
236
+ "current": 1665.824,
237
+ "min": 1500.0,
238
+ "max": 2250.0
239
+ },
240
+ {
241
+ "current": 1663.97,
242
+ "min": 1500.0,
243
+ "max": 2250.0
244
+ },
245
+ {
246
+ "current": 1669.416,
247
+ "min": 1500.0,
248
+ "max": 2250.0
249
+ },
250
+ {
251
+ "current": 1711.485,
252
+ "min": 1500.0,
253
+ "max": 2250.0
254
+ },
255
+ {
256
+ "current": 2435.786,
257
+ "min": 1500.0,
258
+ "max": 2250.0
259
+ },
260
+ {
261
+ "current": 1712.451,
262
+ "min": 1500.0,
263
+ "max": 2250.0
264
+ },
265
+ {
266
+ "current": 1713.291,
267
+ "min": 1500.0,
268
+ "max": 2250.0
269
+ },
270
+ {
271
+ "current": 2260.754,
272
+ "min": 1500.0,
273
+ "max": 2250.0
274
+ },
275
+ {
276
+ "current": 1713.396,
277
+ "min": 1500.0,
278
+ "max": 2250.0
279
+ },
280
+ {
281
+ "current": 1716.495,
282
+ "min": 1500.0,
283
+ "max": 2250.0
284
+ },
285
+ {
286
+ "current": 1716.727,
287
+ "min": 1500.0,
288
+ "max": 2250.0
289
+ },
290
+ {
291
+ "current": 1715.159,
292
+ "min": 1500.0,
293
+ "max": 2250.0
294
+ },
295
+ {
296
+ "current": 1666.66,
297
+ "min": 1500.0,
298
+ "max": 2250.0
299
+ },
300
+ {
301
+ "current": 2327.712,
302
+ "min": 1500.0,
303
+ "max": 2250.0
304
+ },
305
+ {
306
+ "current": 1716.046,
307
+ "min": 1500.0,
308
+ "max": 2250.0
309
+ },
310
+ {
311
+ "current": 1715.048,
312
+ "min": 1500.0,
313
+ "max": 2250.0
314
+ },
315
+ {
316
+ "current": 3233.973,
317
+ "min": 1500.0,
318
+ "max": 2250.0
319
+ },
320
+ {
321
+ "current": 1713.251,
322
+ "min": 1500.0,
323
+ "max": 2250.0
324
+ },
325
+ {
326
+ "current": 3198.61,
327
+ "min": 1500.0,
328
+ "max": 2250.0
329
+ },
330
+ {
331
+ "current": 1713.522,
332
+ "min": 1500.0,
333
+ "max": 2250.0
334
+ },
335
+ {
336
+ "current": 1716.503,
337
+ "min": 1500.0,
338
+ "max": 2250.0
339
+ },
340
+ {
341
+ "current": 1716.921,
342
+ "min": 1500.0,
343
+ "max": 2250.0
344
+ },
345
+ {
346
+ "current": 2449.66,
347
+ "min": 1500.0,
348
+ "max": 2250.0
349
+ },
350
+ {
351
+ "current": 1668.915,
352
+ "min": 1500.0,
353
+ "max": 2250.0
354
+ },
355
+ {
356
+ "current": 2568.264,
357
+ "min": 1500.0,
358
+ "max": 2250.0
359
+ },
360
+ {
361
+ "current": 1668.642,
362
+ "min": 1500.0,
363
+ "max": 2250.0
364
+ },
365
+ {
366
+ "current": 3314.602,
367
+ "min": 1500.0,
368
+ "max": 2250.0
369
+ },
370
+ {
371
+ "current": 1713.343,
372
+ "min": 1500.0,
373
+ "max": 2250.0
374
+ },
375
+ {
376
+ "current": 1710.688,
377
+ "min": 1500.0,
378
+ "max": 2250.0
379
+ },
380
+ {
381
+ "current": 2995.061,
382
+ "min": 1500.0,
383
+ "max": 2250.0
384
+ },
385
+ {
386
+ "current": 1714.724,
387
+ "min": 1500.0,
388
+ "max": 2250.0
389
+ },
390
+ {
391
+ "current": 3259.362,
392
+ "min": 1500.0,
393
+ "max": 2250.0
394
+ },
395
+ {
396
+ "current": 1711.982,
397
+ "min": 1500.0,
398
+ "max": 2250.0
399
+ },
400
+ {
401
+ "current": 1723.149,
402
+ "min": 1500.0,
403
+ "max": 2250.0
404
+ },
405
+ {
406
+ "current": 1704.811,
407
+ "min": 1500.0,
408
+ "max": 2250.0
409
+ },
410
+ {
411
+ "current": 2298.41,
412
+ "min": 1500.0,
413
+ "max": 2250.0
414
+ },
415
+ {
416
+ "current": 1732.061,
417
+ "min": 1500.0,
418
+ "max": 2250.0
419
+ },
420
+ {
421
+ "current": 1697.486,
422
+ "min": 1500.0,
423
+ "max": 2250.0
424
+ },
425
+ {
426
+ "current": 2387.266,
427
+ "min": 1500.0,
428
+ "max": 2250.0
429
+ },
430
+ {
431
+ "current": 1791.846,
432
+ "min": 1500.0,
433
+ "max": 2250.0
434
+ },
435
+ {
436
+ "current": 1794.417,
437
+ "min": 1500.0,
438
+ "max": 2250.0
439
+ },
440
+ {
441
+ "current": 1794.03,
442
+ "min": 1500.0,
443
+ "max": 2250.0
444
+ },
445
+ {
446
+ "current": 1792.18,
447
+ "min": 1500.0,
448
+ "max": 2250.0
449
+ },
450
+ {
451
+ "current": 2259.304,
452
+ "min": 1500.0,
453
+ "max": 2250.0
454
+ },
455
+ {
456
+ "current": 2465.862,
457
+ "min": 1500.0,
458
+ "max": 2250.0
459
+ },
460
+ {
461
+ "current": 2463.001,
462
+ "min": 1500.0,
463
+ "max": 2250.0
464
+ },
465
+ {
466
+ "current": 2459.093,
467
+ "min": 1500.0,
468
+ "max": 2250.0
469
+ },
470
+ {
471
+ "current": 2997.527,
472
+ "min": 1500.0,
473
+ "max": 2250.0
474
+ },
475
+ {
476
+ "current": 2331.651,
477
+ "min": 1500.0,
478
+ "max": 2250.0
479
+ },
480
+ {
481
+ "current": 2591.488,
482
+ "min": 1500.0,
483
+ "max": 2250.0
484
+ },
485
+ {
486
+ "current": 2443.283,
487
+ "min": 1500.0,
488
+ "max": 2250.0
489
+ },
490
+ {
491
+ "current": 2415.125,
492
+ "min": 1500.0,
493
+ "max": 2250.0
494
+ },
495
+ {
496
+ "current": 2461.32,
497
+ "min": 1500.0,
498
+ "max": 2250.0
499
+ },
500
+ {
501
+ "current": 1885.452,
502
+ "min": 1500.0,
503
+ "max": 2250.0
504
+ },
505
+ {
506
+ "current": 1808.205,
507
+ "min": 1500.0,
508
+ "max": 2250.0
509
+ },
510
+ {
511
+ "current": 2520.778,
512
+ "min": 1500.0,
513
+ "max": 2250.0
514
+ },
515
+ {
516
+ "current": 3303.109,
517
+ "min": 1500.0,
518
+ "max": 2250.0
519
+ },
520
+ {
521
+ "current": 2475.55,
522
+ "min": 1500.0,
523
+ "max": 2250.0
524
+ },
525
+ {
526
+ "current": 2536.741,
527
+ "min": 1500.0,
528
+ "max": 2250.0
529
+ },
530
+ {
531
+ "current": 3148.399,
532
+ "min": 1500.0,
533
+ "max": 2250.0
534
+ },
535
+ {
536
+ "current": 3332.648,
537
+ "min": 1500.0,
538
+ "max": 2250.0
539
+ },
540
+ {
541
+ "current": 2543.883,
542
+ "min": 1500.0,
543
+ "max": 2250.0
544
+ },
545
+ {
546
+ "current": 1667.131,
547
+ "min": 1500.0,
548
+ "max": 2250.0
549
+ },
550
+ {
551
+ "current": 3332.767,
552
+ "min": 1500.0,
553
+ "max": 2250.0
554
+ },
555
+ {
556
+ "current": 1666.858,
557
+ "min": 1500.0,
558
+ "max": 2250.0
559
+ },
560
+ {
561
+ "current": 1666.153,
562
+ "min": 1500.0,
563
+ "max": 2250.0
564
+ },
565
+ {
566
+ "current": 1665.151,
567
+ "min": 1500.0,
568
+ "max": 2250.0
569
+ },
570
+ {
571
+ "current": 1713.213,
572
+ "min": 1500.0,
573
+ "max": 2250.0
574
+ },
575
+ {
576
+ "current": 2507.482,
577
+ "min": 1500.0,
578
+ "max": 2250.0
579
+ },
580
+ {
581
+ "current": 1713.066,
582
+ "min": 1500.0,
583
+ "max": 2250.0
584
+ },
585
+ {
586
+ "current": 1670.247,
587
+ "min": 1500.0,
588
+ "max": 2250.0
589
+ },
590
+ {
591
+ "current": 2279.452,
592
+ "min": 1500.0,
593
+ "max": 2250.0
594
+ },
595
+ {
596
+ "current": 1712.054,
597
+ "min": 1500.0,
598
+ "max": 2250.0
599
+ },
600
+ {
601
+ "current": 1713.383,
602
+ "min": 1500.0,
603
+ "max": 2250.0
604
+ },
605
+ {
606
+ "current": 1711.275,
607
+ "min": 1500.0,
608
+ "max": 2250.0
609
+ },
610
+ {
611
+ "current": 1715.372,
612
+ "min": 1500.0,
613
+ "max": 2250.0
614
+ },
615
+ {
616
+ "current": 1714.004,
617
+ "min": 1500.0,
618
+ "max": 2250.0
619
+ },
620
+ {
621
+ "current": 3181.039,
622
+ "min": 1500.0,
623
+ "max": 2250.0
624
+ },
625
+ {
626
+ "current": 1714.611,
627
+ "min": 1500.0,
628
+ "max": 2250.0
629
+ },
630
+ {
631
+ "current": 1714.271,
632
+ "min": 1500.0,
633
+ "max": 2250.0
634
+ },
635
+ {
636
+ "current": 2529.667,
637
+ "min": 1500.0,
638
+ "max": 2250.0
639
+ },
640
+ {
641
+ "current": 1714.294,
642
+ "min": 1500.0,
643
+ "max": 2250.0
644
+ },
645
+ {
646
+ "current": 2456.025,
647
+ "min": 1500.0,
648
+ "max": 2250.0
649
+ },
650
+ {
651
+ "current": 1712.501,
652
+ "min": 1500.0,
653
+ "max": 2250.0
654
+ },
655
+ {
656
+ "current": 1717.28,
657
+ "min": 1500.0,
658
+ "max": 2250.0
659
+ },
660
+ {
661
+ "current": 1712.863,
662
+ "min": 1500.0,
663
+ "max": 2250.0
664
+ },
665
+ {
666
+ "current": 2453.728,
667
+ "min": 1500.0,
668
+ "max": 2250.0
669
+ },
670
+ {
671
+ "current": 1666.803,
672
+ "min": 1500.0,
673
+ "max": 2250.0
674
+ },
675
+ {
676
+ "current": 1668.423,
677
+ "min": 1500.0,
678
+ "max": 2250.0
679
+ },
680
+ {
681
+ "current": 1666.817,
682
+ "min": 1500.0,
683
+ "max": 2250.0
684
+ },
685
+ {
686
+ "current": 3335.646,
687
+ "min": 1500.0,
688
+ "max": 2250.0
689
+ },
690
+ {
691
+ "current": 1664.711,
692
+ "min": 1500.0,
693
+ "max": 2250.0
694
+ },
695
+ {
696
+ "current": 1750.001,
697
+ "min": 1500.0,
698
+ "max": 2250.0
699
+ },
700
+ {
701
+ "current": 3334.823,
702
+ "min": 1500.0,
703
+ "max": 2250.0
704
+ },
705
+ {
706
+ "current": 1659.52,
707
+ "min": 1500.0,
708
+ "max": 2250.0
709
+ },
710
+ {
711
+ "current": 3342.577,
712
+ "min": 1500.0,
713
+ "max": 2250.0
714
+ },
715
+ {
716
+ "current": 1661.087,
717
+ "min": 1500.0,
718
+ "max": 2250.0
719
+ },
720
+ {
721
+ "current": 2210.973,
722
+ "min": 1500.0,
723
+ "max": 2250.0
724
+ },
725
+ {
726
+ "current": 1661.877,
727
+ "min": 1500.0,
728
+ "max": 2250.0
729
+ },
730
+ {
731
+ "current": 1929.122,
732
+ "min": 1500.0,
733
+ "max": 2250.0
734
+ },
735
+ {
736
+ "current": 2363.412,
737
+ "min": 1500.0,
738
+ "max": 2250.0
739
+ },
740
+ {
741
+ "current": 2012.647,
742
+ "min": 1500.0,
743
+ "max": 2250.0
744
+ },
745
+ {
746
+ "current": 3318.307,
747
+ "min": 1500.0,
748
+ "max": 2250.0
749
+ },
750
+ {
751
+ "current": 1792.637,
752
+ "min": 1500.0,
753
+ "max": 2250.0
754
+ },
755
+ {
756
+ "current": 1794.88,
757
+ "min": 1500.0,
758
+ "max": 2250.0
759
+ },
760
+ {
761
+ "current": 1795.26,
762
+ "min": 1500.0,
763
+ "max": 2250.0
764
+ },
765
+ {
766
+ "current": 1796.177,
767
+ "min": 1500.0,
768
+ "max": 2250.0
769
+ }
770
+ ],
771
+ "disk": {
772
+ "/": {
773
+ "total": 1757.8785285949707,
774
+ "used": 1663.5005645751953
775
+ }
776
+ },
777
+ "gpu": "NVIDIA A100-SXM4-80GB",
778
+ "gpu_count": 5,
779
+ "gpu_devices": [
780
+ {
781
+ "name": "NVIDIA A100-SXM4-80GB",
782
+ "memory_total": 85899345920
783
+ },
784
+ {
785
+ "name": "NVIDIA A100-SXM4-80GB",
786
+ "memory_total": 85899345920
787
+ },
788
+ {
789
+ "name": "NVIDIA A100-SXM4-80GB",
790
+ "memory_total": 85899345920
791
+ },
792
+ {
793
+ "name": "NVIDIA DGX Display",
794
+ "memory_total": 4294967296
795
+ },
796
+ {
797
+ "name": "NVIDIA A100-SXM4-80GB",
798
+ "memory_total": 85899345920
799
+ }
800
+ ],
801
+ "memory": {
802
+ "total": 503.5396919250488
803
+ }
804
+ }
wandb/run-20240513_204644-y7fy6vtp/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb": {"runtime": 228}}
wandb/run-20240513_204644-y7fy6vtp/logs/debug-internal.log ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-13 20:46:44,150 INFO StreamThr :1244488 [internal.py:wandb_internal():85] W&B internal server running at pid: 1244488, started at: 2024-05-13 20:46:44.149628
2
+ 2024-05-13 20:46:44,152 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status
3
+ 2024-05-13 20:46:44,154 INFO WriterThread:1244488 [datastore.py:open_for_write():87] open: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/run-y7fy6vtp.wandb
4
+ 2024-05-13 20:46:44,156 DEBUG SenderThread:1244488 [sender.py:send():378] send: header
5
+ 2024-05-13 20:46:44,156 DEBUG SenderThread:1244488 [sender.py:send():378] send: run
6
+ 2024-05-13 20:46:44,526 INFO SenderThread:1244488 [dir_watcher.py:__init__():211] watching files in: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files
7
+ 2024-05-13 20:46:44,526 INFO SenderThread:1244488 [sender.py:_start_run_threads():1123] run started: y7fy6vtp with start time 1715626004.147997
8
+ 2024-05-13 20:46:44,532 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: check_version
9
+ 2024-05-13 20:46:44,532 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: check_version
10
+ 2024-05-13 20:46:44,596 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: run_start
11
+ 2024-05-13 20:46:44,626 DEBUG HandlerThread:1244488 [system_info.py:__init__():26] System info init
12
+ 2024-05-13 20:46:44,626 DEBUG HandlerThread:1244488 [system_info.py:__init__():41] System info init done
13
+ 2024-05-13 20:46:44,626 INFO HandlerThread:1244488 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-05-13 20:46:44,626 INFO SystemMonitor:1244488 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-05-13 20:46:44,627 INFO HandlerThread:1244488 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-05-13 20:46:44,627 INFO SystemMonitor:1244488 [interfaces.py:start():188] Started cpu monitoring
17
+ 2024-05-13 20:46:44,628 INFO SystemMonitor:1244488 [interfaces.py:start():188] Started disk monitoring
18
+ 2024-05-13 20:46:44,628 INFO SystemMonitor:1244488 [interfaces.py:start():188] Started gpu monitoring
19
+ 2024-05-13 20:46:44,629 INFO SystemMonitor:1244488 [interfaces.py:start():188] Started memory monitoring
20
+ 2024-05-13 20:46:44,630 INFO SystemMonitor:1244488 [interfaces.py:start():188] Started network monitoring
21
+ 2024-05-13 20:46:44,664 DEBUG HandlerThread:1244488 [system_info.py:probe():150] Probing system
22
+ 2024-05-13 20:46:44,665 DEBUG HandlerThread:1244488 [system_info.py:_probe_git():135] Probing git
23
+ 2024-05-13 20:46:44,670 DEBUG HandlerThread:1244488 [system_info.py:_probe_git():143] Probing git done
24
+ 2024-05-13 20:46:44,670 DEBUG HandlerThread:1244488 [system_info.py:probe():198] Probing system done
25
+ 2024-05-13 20:46:44,670 DEBUG HandlerThread:1244488 [system_monitor.py:probe():223] {'os': 'Linux-5.4.0-166-generic-x86_64-with-glibc2.31', 'python': '3.11.9', 'heartbeatAt': '2024-05-13T18:46:44.664254', 'startedAt': '2024-05-13T18:46:44.142721', 'docker': None, 'cuda': None, 'args': ('--model_name_or_path', 'parler-tts/parler_tts_mini_v0.1', '--feature_extractor_name', 'parler-tts/dac_44khZ_8kbps', '--description_tokenizer_name', 'parler-tts/parler_tts_mini_v0.1', '--prompt_tokenizer_name', 'parler-tts/parler_tts_mini_v0.1', '--report_to', 'wandb', '--overwrite_output_dir', 'true', '--train_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal', '--train_metadata_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral', '--train_dataset_config_name', 'read', '--train_split_name', 'train', '--eval_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal', '--eval_metadata_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral', '--eval_dataset_config_name', 'read', '--eval_split_name', 'train', '--max_eval_samples', '8', '--per_device_eval_batch_size', '16', '--target_audio_column_name', 'audio', '--description_column_name', 'text_description', '--prompt_column_name', 'text', '--max_duration_in_seconds', '30.0', '--min_duration_in_seconds', '2.0', '--max_text_length', '400', '--preprocessing_num_workers', '2', '--do_train', 'true', '--num_train_epochs', '8', '--gradient_accumulation_steps', '8', '--gradient_checkpointing', 'true', '--per_device_train_batch_size', '16', '--learning_rate', '0.00008', '--adam_beta1', '0.9', '--adam_beta2', '0.99', '--weight_decay', '0.01', '--lr_scheduler_type', 'cosine', '--warmup_steps', '250', '--logging_steps', '2', '--freeze_text_encoder', 'true', '--audio_encoder_per_device_batch_size', '4', '--dtype', 'bfloat16', '--seed', '456', '--output_dir', '../output_dir_training_concat/', '--temporary_save_to_disk', '../audio_code_tmp_concat/', '--save_to_disk', '../tmp_dataset_audio_concat/', '--dataloader_num_workers', '4', '--do_eval', '--predict_with_generate', '--include_inputs_for_metrics', '--save_strategy', 'steps', '--save_steps', '72', '--evaluation_strategy', 'epoch', '--save_total_limit', '5', '--group_by_length', 'true'), 'state': 'running', 'program': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py', 'codePathLocal': 'run_parler_tts_training.py', 'codePath': 'run_parler_tts_training.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/parler-tts-mini-v0.1-expresso-concatenated', 'commit': '99f75adb5e13ee0ad87cce8deb3e71adc370b10e'}, 'email': 'sanchit@huggingface.co', 'root': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated', 'host': 'hf-dgx-01', 'username': 'sanchit', 'executable': '/home/sanchit/miniconda3/envs/venv/bin/python', 'cpu_count': 64, 'cpu_count_logical': 128, 'cpu_freq': {'current': 2713.9304453124996, 'min': 1500.0, 'max': 2250.0}, 'cpu_freq_per_core': [{'current': 2076.148, 'min': 1500.0, 'max': 2250.0}, {'current': 1783.251, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.241, 'min': 1500.0, 'max': 2250.0}, {'current': 1796.012, 'min': 1500.0, 'max': 2250.0}, {'current': 2273.163, 'min': 1500.0, 'max': 2250.0}, {'current': 1749.542, 'min': 1500.0, 'max': 2250.0}, {'current': 1747.968, 'min': 1500.0, 'max': 2250.0}, {'current': 1749.532, 'min': 1500.0, 'max': 2250.0}, {'current': 3151.024, 'min': 1500.0, 'max': 2250.0}, {'current': 2454.223, 'min': 1500.0, 'max': 2250.0}, {'current': 1717.418, 'min': 1500.0, 'max': 2250.0}, {'current': 2440.313, 'min': 1500.0, 'max': 2250.0}, {'current': 1726.936, 'min': 1500.0, 'max': 2250.0}, {'current': 3285.322, 'min': 1500.0, 'max': 2250.0}, {'current': 1665.33, 'min': 1500.0, 'max': 2250.0}, {'current': 1714.033, 'min': 1500.0, 'max': 2250.0}, {'current': 1666.499, 'min': 1500.0, 'max': 2250.0}, {'current': 3313.14, 'min': 1500.0, 'max': 2250.0}, {'current': 3148.73, 'min': 1500.0, 'max': 2250.0}, {'current': 1667.633, 'min': 1500.0, 'max': 2250.0}, {'current': 3314.333, 'min': 1500.0, 'max': 2250.0}, {'current': 1665.824, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.97, 'min': 1500.0, 'max': 2250.0}, {'current': 1669.416, 'min': 1500.0, 'max': 2250.0}, {'current': 1711.485, 'min': 1500.0, 'max': 2250.0}, {'current': 2435.786, 'min': 1500.0, 'max': 2250.0}, {'current': 1712.451, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.291, 'min': 1500.0, 'max': 2250.0}, {'current': 2260.754, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.396, 'min': 1500.0, 'max': 2250.0}, {'current': 1716.495, 'min': 1500.0, 'max': 2250.0}, {'current': 1716.727, 'min': 1500.0, 'max': 2250.0}, {'current': 1715.159, 'min': 1500.0, 'max': 2250.0}, {'current': 1666.66, 'min': 1500.0, 'max': 2250.0}, {'current': 2327.712, 'min': 1500.0, 'max': 2250.0}, {'current': 1716.046, 'min': 1500.0, 'max': 2250.0}, {'current': 1715.048, 'min': 1500.0, 'max': 2250.0}, {'current': 3233.973, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.251, 'min': 1500.0, 'max': 2250.0}, {'current': 3198.61, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.522, 'min': 1500.0, 'max': 2250.0}, {'current': 1716.503, 'min': 1500.0, 'max': 2250.0}, {'current': 1716.921, 'min': 1500.0, 'max': 2250.0}, {'current': 2449.66, 'min': 1500.0, 'max': 2250.0}, {'current': 1668.915, 'min': 1500.0, 'max': 2250.0}, {'current': 2568.264, 'min': 1500.0, 'max': 2250.0}, {'current': 1668.642, 'min': 1500.0, 'max': 2250.0}, {'current': 3314.602, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.343, 'min': 1500.0, 'max': 2250.0}, {'current': 1710.688, 'min': 1500.0, 'max': 2250.0}, {'current': 2995.061, 'min': 1500.0, 'max': 2250.0}, {'current': 1714.724, 'min': 1500.0, 'max': 2250.0}, {'current': 3259.362, 'min': 1500.0, 'max': 2250.0}, {'current': 1711.982, 'min': 1500.0, 'max': 2250.0}, {'current': 1723.149, 'min': 1500.0, 'max': 2250.0}, {'current': 1704.811, 'min': 1500.0, 'max': 2250.0}, {'current': 2298.41, 'min': 1500.0, 'max': 2250.0}, {'current': 1732.061, 'min': 1500.0, 'max': 2250.0}, {'current': 1697.486, 'min': 1500.0, 'max': 2250.0}, {'current': 2387.266, 'min': 1500.0, 'max': 2250.0}, {'current': 1791.846, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.417, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.03, 'min': 1500.0, 'max': 2250.0}, {'current': 1792.18, 'min': 1500.0, 'max': 2250.0}, {'current': 2259.304, 'min': 1500.0, 'max': 2250.0}, {'current': 2465.862, 'min': 1500.0, 'max': 2250.0}, {'current': 2463.001, 'min': 1500.0, 'max': 2250.0}, {'current': 2459.093, 'min': 1500.0, 'max': 2250.0}, {'current': 2997.527, 'min': 1500.0, 'max': 2250.0}, {'current': 2331.651, 'min': 1500.0, 'max': 2250.0}, {'current': 2591.488, 'min': 1500.0, 'max': 2250.0}, {'current': 2443.283, 'min': 1500.0, 'max': 2250.0}, {'current': 2415.125, 'min': 1500.0, 'max': 2250.0}, {'current': 2461.32, 'min': 1500.0, 'max': 2250.0}, {'current': 1885.452, 'min': 1500.0, 'max': 2250.0}, {'current': 1808.205, 'min': 1500.0, 'max': 2250.0}, {'current': 2520.778, 'min': 1500.0, 'max': 2250.0}, {'current': 3303.109, 'min': 1500.0, 'max': 2250.0}, {'current': 2475.55, 'min': 1500.0, 'max': 2250.0}, {'current': 2536.741, 'min': 1500.0, 'max': 2250.0}, {'current': 3148.399, 'min': 1500.0, 'max': 2250.0}, {'current': 3332.648, 'min': 1500.0, 'max': 2250.0}, {'current': 2543.883, 'min': 1500.0, 'max': 2250.0}, {'current': 1667.131, 'min': 1500.0, 'max': 2250.0}, {'current': 3332.767, 'min': 1500.0, 'max': 2250.0}, {'current': 1666.858, 'min': 1500.0, 'max': 2250.0}, {'current': 1666.153, 'min': 1500.0, 'max': 2250.0}, {'current': 1665.151, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.213, 'min': 1500.0, 'max': 2250.0}, {'current': 2507.482, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.066, 'min': 1500.0, 'max': 2250.0}, {'current': 1670.247, 'min': 1500.0, 'max': 2250.0}, {'current': 2279.452, 'min': 1500.0, 'max': 2250.0}, {'current': 1712.054, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.383, 'min': 1500.0, 'max': 2250.0}, {'current': 1711.275, 'min': 1500.0, 'max': 2250.0}, {'current': 1715.372, 'min': 1500.0, 'max': 2250.0}, {'current': 1714.004, 'min': 1500.0, 'max': 2250.0}, {'current': 3181.039, 'min': 1500.0, 'max': 2250.0}, {'current': 1714.611, 'min': 1500.0, 'max': 2250.0}, {'current': 1714.271, 'min': 1500.0, 'max': 2250.0}, {'current': 2529.667, 'min': 1500.0, 'max': 2250.0}, {'current': 1714.294, 'min': 1500.0, 'max': 2250.0}, {'current': 2456.025, 'min': 1500.0, 'max': 2250.0}, {'current': 1712.501, 'min': 1500.0, 'max': 2250.0}, {'current': 1717.28, 'min': 1500.0, 'max': 2250.0}, {'current': 1712.863, 'min': 1500.0, 'max': 2250.0}, {'current': 2453.728, 'min': 1500.0, 'max': 2250.0}, {'current': 1666.803, 'min': 1500.0, 'max': 2250.0}, {'current': 1668.423, 'min': 1500.0, 'max': 2250.0}, {'current': 1666.817, 'min': 1500.0, 'max': 2250.0}, {'current': 3335.646, 'min': 1500.0, 'max': 2250.0}, {'current': 1664.711, 'min': 1500.0, 'max': 2250.0}, {'current': 1750.001, 'min': 1500.0, 'max': 2250.0}, {'current': 3334.823, 'min': 1500.0, 'max': 2250.0}, {'current': 1659.52, 'min': 1500.0, 'max': 2250.0}, {'current': 3342.577, 'min': 1500.0, 'max': 2250.0}, {'current': 1661.087, 'min': 1500.0, 'max': 2250.0}, {'current': 2210.973, 'min': 1500.0, 'max': 2250.0}, {'current': 1661.877, 'min': 1500.0, 'max': 2250.0}, {'current': 1929.122, 'min': 1500.0, 'max': 2250.0}, {'current': 2363.412, 'min': 1500.0, 'max': 2250.0}, {'current': 2012.647, 'min': 1500.0, 'max': 2250.0}, {'current': 3318.307, 'min': 1500.0, 'max': 2250.0}, {'current': 1792.637, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.88, 'min': 1500.0, 'max': 2250.0}, {'current': 1795.26, 'min': 1500.0, 'max': 2250.0}, {'current': 1796.177, 'min': 1500.0, 'max': 2250.0}], 'disk': {'/': {'total': 1757.8785285949707, 'used': 1663.5005645751953}}, 'gpu': 'NVIDIA A100-SXM4-80GB', 'gpu_count': 5, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}, {'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}, {'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}, {'name': 'NVIDIA DGX Display', 'memory_total': 4294967296}, {'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}], 'memory': {'total': 503.5396919250488}}
26
+ 2024-05-13 20:46:44,671 INFO HandlerThread:1244488 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-05-13 20:46:44,671 INFO HandlerThread:1244488 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-05-13 20:46:44,671 DEBUG HandlerThread:1244488 [system_info.py:_save_conda():207] Saving list of conda packages installed into the current environment
29
+ 2024-05-13 20:46:44,685 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
30
+ 2024-05-13 20:46:44,700 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
31
+ 2024-05-13 20:46:45,528 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/conda-environment.yaml
32
+ 2024-05-13 20:46:46,722 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
33
+ 2024-05-13 20:46:46,733 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
34
+ 2024-05-13 20:46:47,041 DEBUG HandlerThread:1244488 [system_info.py:_save_conda():222] Saving conda packages done
35
+ 2024-05-13 20:46:47,042 INFO HandlerThread:1244488 [system_monitor.py:probe():229] Finished publishing system info
36
+ 2024-05-13 20:46:47,050 DEBUG SenderThread:1244488 [sender.py:send():378] send: files
37
+ 2024-05-13 20:46:47,051 INFO SenderThread:1244488 [sender.py:_save_file():1389] saving file wandb-metadata.json with policy now
38
+ 2024-05-13 20:46:47,164 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: python_packages
39
+ 2024-05-13 20:46:47,164 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
40
+ 2024-05-13 20:46:47,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: python_packages
41
+ 2024-05-13 20:46:47,167 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
42
+ 2024-05-13 20:46:47,399 DEBUG SenderThread:1244488 [sender.py:send():378] send: telemetry
43
+ 2024-05-13 20:46:47,399 DEBUG SenderThread:1244488 [sender.py:send():378] send: config
44
+ 2024-05-13 20:46:47,527 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/conda-environment.yaml
45
+ 2024-05-13 20:46:47,527 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/wandb-metadata.json
46
+ 2024-05-13 20:46:47,527 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/requirements.txt
47
+ 2024-05-13 20:46:47,527 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
48
+ 2024-05-13 20:46:47,577 INFO wandb-upload_0:1244488 [upload_job.py:push():130] Uploaded file /tmp/tmpub0aqma1wandb/xhoqu8pi-wandb-metadata.json
49
+ 2024-05-13 20:46:49,371 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
50
+ 2024-05-13 20:46:49,527 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
51
+ 2024-05-13 20:46:49,761 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
52
+ 2024-05-13 20:46:49,774 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
53
+ 2024-05-13 20:46:51,528 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
54
+ 2024-05-13 20:46:51,799 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
55
+ 2024-05-13 20:46:51,809 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
56
+ 2024-05-13 20:46:54,372 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
57
+ 2024-05-13 20:46:54,928 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
58
+ 2024-05-13 20:46:54,950 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
59
+ 2024-05-13 20:46:56,968 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
60
+ 2024-05-13 20:46:56,979 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
61
+ 2024-05-13 20:46:59,372 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
62
+ 2024-05-13 20:46:59,995 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
63
+ 2024-05-13 20:47:00,017 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
64
+ 2024-05-13 20:47:02,037 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
65
+ 2024-05-13 20:47:02,060 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
66
+ 2024-05-13 20:47:02,164 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
67
+ 2024-05-13 20:47:02,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
68
+ 2024-05-13 20:47:05,234 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
69
+ 2024-05-13 20:47:05,248 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
70
+ 2024-05-13 20:47:05,342 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
71
+ 2024-05-13 20:47:07,294 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
72
+ 2024-05-13 20:47:07,305 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
73
+ 2024-05-13 20:47:10,342 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
74
+ 2024-05-13 20:47:10,471 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
75
+ 2024-05-13 20:47:10,483 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
76
+ 2024-05-13 20:47:12,502 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
77
+ 2024-05-13 20:47:12,511 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
78
+ 2024-05-13 20:47:14,539 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
79
+ 2024-05-13 20:47:15,347 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
80
+ 2024-05-13 20:47:15,535 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/config.yaml
81
+ 2024-05-13 20:47:15,597 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
82
+ 2024-05-13 20:47:17,163 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
83
+ 2024-05-13 20:47:17,164 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
84
+ 2024-05-13 20:47:17,617 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
85
+ 2024-05-13 20:47:17,627 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
86
+ 2024-05-13 20:47:20,776 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
87
+ 2024-05-13 20:47:20,799 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
88
+ 2024-05-13 20:47:21,373 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
89
+ 2024-05-13 20:47:24,480 ERROR gpu :1244488 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
90
+ 2024-05-13 20:47:24,503 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
91
+ 2024-05-13 20:47:24,513 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
92
+ 2024-05-13 20:47:26,374 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
93
+ 2024-05-13 20:47:26,539 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
94
+ 2024-05-13 20:47:26,549 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
95
+ 2024-05-13 20:47:29,415 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
96
+ 2024-05-13 20:47:29,435 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
97
+ 2024-05-13 20:47:31,375 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
98
+ 2024-05-13 20:47:31,471 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
99
+ 2024-05-13 20:47:31,494 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
100
+ 2024-05-13 20:47:32,163 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
101
+ 2024-05-13 20:47:32,164 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
102
+ 2024-05-13 20:47:34,504 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
103
+ 2024-05-13 20:47:34,522 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
104
+ 2024-05-13 20:47:36,590 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
105
+ 2024-05-13 20:47:36,609 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
106
+ 2024-05-13 20:47:37,373 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
107
+ 2024-05-13 20:47:38,669 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
108
+ 2024-05-13 20:47:38,682 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
109
+ 2024-05-13 20:47:41,557 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
110
+ 2024-05-13 20:47:41,584 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
111
+ 2024-05-13 20:47:42,374 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
112
+ 2024-05-13 20:47:43,631 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
113
+ 2024-05-13 20:47:43,652 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
114
+ 2024-05-13 20:47:44,630 DEBUG SystemMonitor:1244488 [system_monitor.py:_start():172] Starting system metrics aggregation loop
115
+ 2024-05-13 20:47:44,634 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
116
+ 2024-05-13 20:47:46,570 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
117
+ 2024-05-13 20:47:46,604 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
118
+ 2024-05-13 20:47:47,164 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
119
+ 2024-05-13 20:47:47,164 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
120
+ 2024-05-13 20:47:48,372 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
121
+ 2024-05-13 20:47:48,627 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
122
+ 2024-05-13 20:47:48,647 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
123
+ 2024-05-13 20:47:51,667 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
124
+ 2024-05-13 20:47:51,686 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
125
+ 2024-05-13 20:47:53,373 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
126
+ 2024-05-13 20:47:53,731 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
127
+ 2024-05-13 20:47:53,745 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
128
+ 2024-05-13 20:47:56,603 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
129
+ 2024-05-13 20:47:56,614 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
130
+ 2024-05-13 20:47:58,374 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
131
+ 2024-05-13 20:47:58,651 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
132
+ 2024-05-13 20:47:58,660 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
133
+ 2024-05-13 20:48:02,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
134
+ 2024-05-13 20:48:02,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
135
+ 2024-05-13 20:48:02,441 ERROR gpu :1244488 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
136
+ 2024-05-13 20:48:03,327 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
137
+ 2024-05-13 20:48:03,354 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
138
+ 2024-05-13 20:48:03,406 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
139
+ 2024-05-13 20:48:05,386 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
140
+ 2024-05-13 20:48:05,421 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
141
+ 2024-05-13 20:48:08,407 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
142
+ 2024-05-13 20:48:08,479 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
143
+ 2024-05-13 20:48:08,498 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
144
+ 2024-05-13 20:48:10,520 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
145
+ 2024-05-13 20:48:10,533 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
146
+ 2024-05-13 20:48:13,408 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
147
+ 2024-05-13 20:48:13,506 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
148
+ 2024-05-13 20:48:13,529 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
149
+ 2024-05-13 20:48:14,637 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
150
+ 2024-05-13 20:48:15,573 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
151
+ 2024-05-13 20:48:15,595 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
152
+ 2024-05-13 20:48:17,164 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
153
+ 2024-05-13 20:48:17,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
154
+ 2024-05-13 20:48:18,553 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
155
+ 2024-05-13 20:48:18,576 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
156
+ 2024-05-13 20:48:19,398 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
157
+ 2024-05-13 20:48:20,630 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
158
+ 2024-05-13 20:48:20,645 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
159
+ 2024-05-13 20:48:23,668 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
160
+ 2024-05-13 20:48:23,707 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
161
+ 2024-05-13 20:48:24,399 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
162
+ 2024-05-13 20:48:25,750 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
163
+ 2024-05-13 20:48:25,769 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
164
+ 2024-05-13 20:48:28,690 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
165
+ 2024-05-13 20:48:28,701 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
166
+ 2024-05-13 20:48:29,400 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
167
+ 2024-05-13 20:48:30,752 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
168
+ 2024-05-13 20:48:30,766 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
169
+ 2024-05-13 20:48:32,164 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
170
+ 2024-05-13 20:48:32,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
171
+ 2024-05-13 20:48:33,779 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
172
+ 2024-05-13 20:48:33,801 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
173
+ 2024-05-13 20:48:35,390 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
174
+ 2024-05-13 20:48:35,833 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
175
+ 2024-05-13 20:48:35,857 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
176
+ 2024-05-13 20:48:38,883 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
177
+ 2024-05-13 20:48:38,904 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
178
+ 2024-05-13 20:48:40,391 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
179
+ 2024-05-13 20:48:42,674 ERROR gpu :1244488 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
180
+ 2024-05-13 20:48:42,717 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
181
+ 2024-05-13 20:48:42,728 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
182
+ 2024-05-13 20:48:44,638 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
183
+ 2024-05-13 20:48:45,640 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
184
+ 2024-05-13 20:48:45,792 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
185
+ 2024-05-13 20:48:45,830 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
186
+ 2024-05-13 20:48:47,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
187
+ 2024-05-13 20:48:47,166 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
188
+ 2024-05-13 20:48:47,862 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
189
+ 2024-05-13 20:48:47,885 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
190
+ 2024-05-13 20:48:50,784 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
191
+ 2024-05-13 20:48:50,806 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
192
+ 2024-05-13 20:48:51,392 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
193
+ 2024-05-13 20:48:52,825 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
194
+ 2024-05-13 20:48:52,839 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
195
+ 2024-05-13 20:48:55,925 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
196
+ 2024-05-13 20:48:55,958 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
197
+ 2024-05-13 20:48:56,393 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
198
+ 2024-05-13 20:48:57,986 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
199
+ 2024-05-13 20:48:58,004 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
200
+ 2024-05-13 20:49:01,057 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
201
+ 2024-05-13 20:49:01,082 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
202
+ 2024-05-13 20:49:01,394 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
203
+ 2024-05-13 20:49:02,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
204
+ 2024-05-13 20:49:02,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
205
+ 2024-05-13 20:49:03,103 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
206
+ 2024-05-13 20:49:03,126 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
207
+ 2024-05-13 20:49:06,046 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
208
+ 2024-05-13 20:49:06,069 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
209
+ 2024-05-13 20:49:07,371 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
210
+ 2024-05-13 20:49:08,089 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
211
+ 2024-05-13 20:49:08,099 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
212
+ 2024-05-13 20:49:11,174 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
213
+ 2024-05-13 20:49:11,186 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
214
+ 2024-05-13 20:49:12,372 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
215
+ 2024-05-13 20:49:13,203 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
216
+ 2024-05-13 20:49:13,213 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
217
+ 2024-05-13 20:49:14,641 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
218
+ 2024-05-13 20:49:16,181 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
219
+ 2024-05-13 20:49:16,195 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
220
+ 2024-05-13 20:49:17,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
221
+ 2024-05-13 20:49:17,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
222
+ 2024-05-13 20:49:18,217 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
223
+ 2024-05-13 20:49:18,228 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
224
+ 2024-05-13 20:49:18,360 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
225
+ 2024-05-13 20:49:21,690 ERROR gpu :1244488 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
226
+ 2024-05-13 20:49:21,712 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
227
+ 2024-05-13 20:49:21,934 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
228
+ 2024-05-13 20:49:23,361 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
229
+ 2024-05-13 20:49:24,088 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
230
+ 2024-05-13 20:49:24,097 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
231
+ 2024-05-13 20:49:26,124 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
232
+ 2024-05-13 20:49:26,134 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
233
+ 2024-05-13 20:49:28,362 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
234
+ 2024-05-13 20:49:28,618 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
235
+ 2024-05-13 20:49:28,634 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
236
+ 2024-05-13 20:49:30,672 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
237
+ 2024-05-13 20:49:30,685 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
238
+ 2024-05-13 20:49:32,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
239
+ 2024-05-13 20:49:32,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
240
+ 2024-05-13 20:49:33,142 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
241
+ 2024-05-13 20:49:33,160 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
242
+ 2024-05-13 20:49:33,398 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
243
+ 2024-05-13 20:49:35,183 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
244
+ 2024-05-13 20:49:35,200 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
245
+ 2024-05-13 20:49:37,936 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
246
+ 2024-05-13 20:49:37,992 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
247
+ 2024-05-13 20:49:38,398 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
248
+ 2024-05-13 20:49:40,036 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
249
+ 2024-05-13 20:49:40,045 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
250
+ 2024-05-13 20:49:42,607 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
251
+ 2024-05-13 20:49:42,624 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
252
+ 2024-05-13 20:49:43,399 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
253
+ 2024-05-13 20:49:44,643 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
254
+ 2024-05-13 20:49:44,677 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
255
+ 2024-05-13 20:49:44,693 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
256
+ 2024-05-13 20:49:47,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
257
+ 2024-05-13 20:49:47,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
258
+ 2024-05-13 20:49:47,255 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
259
+ 2024-05-13 20:49:47,292 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
260
+ 2024-05-13 20:49:49,311 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
261
+ 2024-05-13 20:49:49,328 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
262
+ 2024-05-13 20:49:49,336 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
263
+ 2024-05-13 20:49:51,888 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
264
+ 2024-05-13 20:49:51,906 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
265
+ 2024-05-13 20:49:54,329 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
266
+ 2024-05-13 20:49:54,371 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
267
+ 2024-05-13 20:49:54,403 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
268
+ 2024-05-13 20:49:58,505 ERROR gpu :1244488 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
269
+ 2024-05-13 20:49:59,014 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
270
+ 2024-05-13 20:49:59,023 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
271
+ 2024-05-13 20:49:59,330 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
272
+ 2024-05-13 20:50:01,042 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
273
+ 2024-05-13 20:50:01,076 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
274
+ 2024-05-13 20:50:02,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
275
+ 2024-05-13 20:50:02,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
276
+ 2024-05-13 20:50:03,671 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
277
+ 2024-05-13 20:50:03,687 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
278
+ 2024-05-13 20:50:04,400 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
279
+ 2024-05-13 20:50:05,735 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
280
+ 2024-05-13 20:50:05,745 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
281
+ 2024-05-13 20:50:08,359 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
282
+ 2024-05-13 20:50:08,383 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
283
+ 2024-05-13 20:50:09,405 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
284
+ 2024-05-13 20:50:10,443 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
285
+ 2024-05-13 20:50:10,463 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
286
+ 2024-05-13 20:50:13,159 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
287
+ 2024-05-13 20:50:13,196 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
288
+ 2024-05-13 20:50:14,405 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
289
+ 2024-05-13 20:50:14,646 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
290
+ 2024-05-13 20:50:15,219 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
291
+ 2024-05-13 20:50:15,230 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
292
+ 2024-05-13 20:50:17,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
293
+ 2024-05-13 20:50:17,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
294
+ 2024-05-13 20:50:17,757 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
295
+ 2024-05-13 20:50:17,769 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
296
+ 2024-05-13 20:50:19,788 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
297
+ 2024-05-13 20:50:19,798 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
298
+ 2024-05-13 20:50:20,116 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
299
+ 2024-05-13 20:50:21,592 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
300
+ 2024-05-13 20:50:22,219 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
301
+ 2024-05-13 20:50:22,235 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
302
+ 2024-05-13 20:50:24,253 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
303
+ 2024-05-13 20:50:24,262 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
304
+ 2024-05-13 20:50:25,150 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
305
+ 2024-05-13 20:50:25,593 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
306
+ 2024-05-13 20:50:26,744 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
307
+ 2024-05-13 20:50:26,754 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
308
+ 2024-05-13 20:50:27,593 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
309
+ 2024-05-13 20:50:28,771 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
310
+ 2024-05-13 20:50:28,784 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
311
+ 2024-05-13 20:50:29,594 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
312
+ 2024-05-13 20:50:30,834 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
313
+ 2024-05-13 20:50:31,305 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
314
+ 2024-05-13 20:50:31,314 ERROR gpu :1244488 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
315
+ 2024-05-13 20:50:31,594 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
316
+ 2024-05-13 20:50:32,165 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: stop_status
317
+ 2024-05-13 20:50:32,165 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: stop_status
318
+ 2024-05-13 20:50:33,134 DEBUG SenderThread:1244488 [sender.py:send():378] send: exit
319
+ 2024-05-13 20:50:33,134 INFO SenderThread:1244488 [sender.py:send_exit():585] handling exit code: 1
320
+ 2024-05-13 20:50:33,134 INFO SenderThread:1244488 [sender.py:send_exit():587] handling runtime: 228
321
+ 2024-05-13 20:50:33,134 INFO SenderThread:1244488 [sender.py:_save_file():1389] saving file wandb-summary.json with policy end
322
+ 2024-05-13 20:50:33,134 INFO SenderThread:1244488 [sender.py:send_exit():593] send defer
323
+ 2024-05-13 20:50:33,135 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
324
+ 2024-05-13 20:50:33,135 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 0
325
+ 2024-05-13 20:50:33,135 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
326
+ 2024-05-13 20:50:33,135 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 0
327
+ 2024-05-13 20:50:33,135 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 1
328
+ 2024-05-13 20:50:33,135 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
329
+ 2024-05-13 20:50:33,135 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 1
330
+ 2024-05-13 20:50:33,135 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
331
+ 2024-05-13 20:50:33,135 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 1
332
+ 2024-05-13 20:50:33,135 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 2
333
+ 2024-05-13 20:50:33,135 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
334
+ 2024-05-13 20:50:33,135 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 2
335
+ 2024-05-13 20:50:33,136 INFO HandlerThread:1244488 [system_monitor.py:finish():203] Stopping system monitor
336
+ 2024-05-13 20:50:33,136 DEBUG SystemMonitor:1244488 [system_monitor.py:_start():179] Finished system metrics aggregation loop
337
+ 2024-05-13 20:50:33,136 DEBUG SystemMonitor:1244488 [system_monitor.py:_start():183] Publishing last batch of metrics
338
+ 2024-05-13 20:50:33,139 INFO HandlerThread:1244488 [interfaces.py:finish():200] Joined cpu monitor
339
+ 2024-05-13 20:50:33,139 INFO HandlerThread:1244488 [interfaces.py:finish():200] Joined disk monitor
340
+ 2024-05-13 20:50:33,595 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
341
+ 2024-05-13 20:50:33,595 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/wandb-summary.json
342
+ 2024-05-13 20:50:34,732 ERROR gpu :1244488 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
343
+ 2024-05-13 20:50:34,732 INFO HandlerThread:1244488 [interfaces.py:finish():200] Joined gpu monitor
344
+ 2024-05-13 20:50:34,732 INFO HandlerThread:1244488 [interfaces.py:finish():200] Joined memory monitor
345
+ 2024-05-13 20:50:34,733 INFO HandlerThread:1244488 [interfaces.py:finish():200] Joined network monitor
346
+ 2024-05-13 20:50:34,733 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: poll_exit
347
+ 2024-05-13 20:50:34,734 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
348
+ 2024-05-13 20:50:34,734 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 2
349
+ 2024-05-13 20:50:34,735 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 3
350
+ 2024-05-13 20:50:34,735 DEBUG SenderThread:1244488 [sender.py:send():378] send: stats
351
+ 2024-05-13 20:50:34,736 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: poll_exit
352
+ 2024-05-13 20:50:34,736 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
353
+ 2024-05-13 20:50:34,736 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 3
354
+ 2024-05-13 20:50:34,736 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
355
+ 2024-05-13 20:50:34,736 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 3
356
+ 2024-05-13 20:50:34,736 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 4
357
+ 2024-05-13 20:50:34,736 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
358
+ 2024-05-13 20:50:34,736 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 4
359
+ 2024-05-13 20:50:34,736 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
360
+ 2024-05-13 20:50:34,737 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 4
361
+ 2024-05-13 20:50:34,737 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 5
362
+ 2024-05-13 20:50:34,737 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
363
+ 2024-05-13 20:50:34,737 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 5
364
+ 2024-05-13 20:50:34,737 DEBUG SenderThread:1244488 [sender.py:send():378] send: summary
365
+ 2024-05-13 20:50:34,737 INFO SenderThread:1244488 [sender.py:_save_file():1389] saving file wandb-summary.json with policy end
366
+ 2024-05-13 20:50:34,737 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
367
+ 2024-05-13 20:50:34,737 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 5
368
+ 2024-05-13 20:50:34,737 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 6
369
+ 2024-05-13 20:50:34,738 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
370
+ 2024-05-13 20:50:34,738 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 6
371
+ 2024-05-13 20:50:34,738 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
372
+ 2024-05-13 20:50:34,738 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 6
373
+ 2024-05-13 20:50:34,738 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 7
374
+ 2024-05-13 20:50:34,738 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: status_report
375
+ 2024-05-13 20:50:34,738 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
376
+ 2024-05-13 20:50:34,738 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 7
377
+ 2024-05-13 20:50:34,738 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
378
+ 2024-05-13 20:50:34,738 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 7
379
+ 2024-05-13 20:50:35,134 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: poll_exit
380
+ 2024-05-13 20:50:35,540 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 8
381
+ 2024-05-13 20:50:35,540 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: poll_exit
382
+ 2024-05-13 20:50:35,540 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
383
+ 2024-05-13 20:50:35,541 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 8
384
+ 2024-05-13 20:50:35,541 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
385
+ 2024-05-13 20:50:35,541 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 8
386
+ 2024-05-13 20:50:35,541 INFO SenderThread:1244488 [job_builder.py:build():432] Attempting to build job artifact
387
+ 2024-05-13 20:50:35,541 INFO SenderThread:1244488 [job_builder.py:_get_source_type():565] is repo sourced job
388
+ 2024-05-13 20:50:35,565 INFO SenderThread:1244488 [job_builder.py:build():541] adding wandb-job metadata file
389
+ 2024-05-13 20:50:35,566 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 9
390
+ 2024-05-13 20:50:35,567 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
391
+ 2024-05-13 20:50:35,567 DEBUG SenderThread:1244488 [sender.py:send():378] send: artifact
392
+ 2024-05-13 20:50:35,567 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 9
393
+ 2024-05-13 20:50:35,596 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/wandb-summary.json
394
+ 2024-05-13 20:50:35,596 INFO Thread-12 :1244488 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
395
+ 2024-05-13 20:50:36,135 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: poll_exit
396
+ 2024-05-13 20:50:36,629 INFO wandb-upload_1:1244488 [upload_job.py:push():88] Uploaded file /tmp/tmpnjo96axd/wandb-job.json
397
+ 2024-05-13 20:50:36,674 INFO wandb-upload_0:1244488 [upload_job.py:push():88] Uploaded file /home/sanchit/.local/share/wandb/artifacts/staging/tmp8rqc3i5s
398
+ 2024-05-13 20:50:37,629 INFO SenderThread:1244488 [sender.py:send_artifact():1467] sent artifact job-https___huggingface.co_sanchit-gandhi_parler-tts-mini-v0.1-expresso-concatenated_run_parler_tts_training.py - {'id': 'QXJ0aWZhY3Q6ODM0NzI5Njgw', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjE3NDIzMTI1Mw==', 'latestArtifact': None}}
399
+ 2024-05-13 20:50:37,629 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
400
+ 2024-05-13 20:50:37,629 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 9
401
+ 2024-05-13 20:50:37,630 INFO SenderThread:1244488 [dir_watcher.py:finish():358] shutting down directory watcher
402
+ 2024-05-13 20:50:38,597 INFO SenderThread:1244488 [dir_watcher.py:finish():388] scan: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files
403
+ 2024-05-13 20:50:38,597 INFO SenderThread:1244488 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/conda-environment.yaml conda-environment.yaml
404
+ 2024-05-13 20:50:38,597 INFO SenderThread:1244488 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/wandb-summary.json wandb-summary.json
405
+ 2024-05-13 20:50:38,597 INFO SenderThread:1244488 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log output.log
406
+ 2024-05-13 20:50:38,597 INFO SenderThread:1244488 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/requirements.txt requirements.txt
407
+ 2024-05-13 20:50:38,601 INFO SenderThread:1244488 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/config.yaml config.yaml
408
+ 2024-05-13 20:50:38,603 INFO SenderThread:1244488 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/wandb-metadata.json wandb-metadata.json
409
+ 2024-05-13 20:50:38,604 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 10
410
+ 2024-05-13 20:50:38,606 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: poll_exit
411
+ 2024-05-13 20:50:38,608 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
412
+ 2024-05-13 20:50:38,609 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 10
413
+ 2024-05-13 20:50:38,609 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
414
+ 2024-05-13 20:50:38,609 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 10
415
+ 2024-05-13 20:50:38,609 INFO SenderThread:1244488 [file_pusher.py:finish():169] shutting down file pusher
416
+ 2024-05-13 20:50:38,936 INFO wandb-upload_0:1244488 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/wandb-summary.json
417
+ 2024-05-13 20:50:38,945 INFO wandb-upload_1:1244488 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/conda-environment.yaml
418
+ 2024-05-13 20:50:39,053 INFO wandb-upload_3:1244488 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/requirements.txt
419
+ 2024-05-13 20:50:39,066 INFO wandb-upload_2:1244488 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/output.log
420
+ 2024-05-13 20:50:39,072 INFO wandb-upload_4:1244488 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/files/config.yaml
421
+ 2024-05-13 20:50:39,136 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: poll_exit
422
+ 2024-05-13 20:50:39,136 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: poll_exit
423
+ 2024-05-13 20:50:39,272 INFO Thread-11 (_thread_body):1244488 [sender.py:transition_state():613] send defer: 11
424
+ 2024-05-13 20:50:39,273 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
425
+ 2024-05-13 20:50:39,273 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 11
426
+ 2024-05-13 20:50:39,273 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
427
+ 2024-05-13 20:50:39,273 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 11
428
+ 2024-05-13 20:50:39,273 INFO SenderThread:1244488 [file_pusher.py:join():175] waiting for file pusher
429
+ 2024-05-13 20:50:39,273 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 12
430
+ 2024-05-13 20:50:39,274 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
431
+ 2024-05-13 20:50:39,274 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 12
432
+ 2024-05-13 20:50:39,274 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
433
+ 2024-05-13 20:50:39,274 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 12
434
+ 2024-05-13 20:50:39,274 INFO SenderThread:1244488 [file_stream.py:finish():601] file stream finish called
435
+ 2024-05-13 20:50:39,402 INFO SenderThread:1244488 [file_stream.py:finish():605] file stream finish is done
436
+ 2024-05-13 20:50:39,402 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 13
437
+ 2024-05-13 20:50:39,402 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
438
+ 2024-05-13 20:50:39,402 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 13
439
+ 2024-05-13 20:50:39,403 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
440
+ 2024-05-13 20:50:39,403 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 13
441
+ 2024-05-13 20:50:39,403 INFO SenderThread:1244488 [sender.py:transition_state():613] send defer: 14
442
+ 2024-05-13 20:50:39,403 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: defer
443
+ 2024-05-13 20:50:39,403 INFO HandlerThread:1244488 [handler.py:handle_request_defer():184] handle defer: 14
444
+ 2024-05-13 20:50:39,403 DEBUG SenderThread:1244488 [sender.py:send():378] send: final
445
+ 2024-05-13 20:50:39,403 DEBUG SenderThread:1244488 [sender.py:send():378] send: footer
446
+ 2024-05-13 20:50:39,403 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: defer
447
+ 2024-05-13 20:50:39,403 INFO SenderThread:1244488 [sender.py:send_request_defer():609] handle sender defer: 14
448
+ 2024-05-13 20:50:39,404 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: poll_exit
449
+ 2024-05-13 20:50:39,404 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: poll_exit
450
+ 2024-05-13 20:50:39,405 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: poll_exit
451
+ 2024-05-13 20:50:39,405 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: poll_exit
452
+ 2024-05-13 20:50:39,405 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: server_info
453
+ 2024-05-13 20:50:39,406 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: get_summary
454
+ 2024-05-13 20:50:39,406 DEBUG SenderThread:1244488 [sender.py:send_request():405] send_request: server_info
455
+ 2024-05-13 20:50:39,408 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: sampled_history
456
+ 2024-05-13 20:50:39,408 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: internal_messages
457
+ 2024-05-13 20:50:39,538 INFO MainThread:1244488 [wandb_run.py:_footer_history_summary_info():3994] rendering history
458
+ 2024-05-13 20:50:39,538 INFO MainThread:1244488 [wandb_run.py:_footer_history_summary_info():4026] rendering summary
459
+ 2024-05-13 20:50:39,538 INFO MainThread:1244488 [wandb_run.py:_footer_sync_info():3953] logging synced files
460
+ 2024-05-13 20:50:39,539 DEBUG HandlerThread:1244488 [handler.py:handle_request():158] handle_request: shutdown
461
+ 2024-05-13 20:50:39,539 INFO HandlerThread:1244488 [handler.py:finish():882] shutting down handler
462
+ 2024-05-13 20:50:40,406 INFO WriterThread:1244488 [datastore.py:close():296] close: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/run-y7fy6vtp.wandb
463
+ 2024-05-13 20:50:40,538 INFO SenderThread:1244488 [sender.py:finish():1545] shutting down sender
464
+ 2024-05-13 20:50:40,538 INFO SenderThread:1244488 [file_pusher.py:finish():169] shutting down file pusher
465
+ 2024-05-13 20:50:40,538 INFO SenderThread:1244488 [file_pusher.py:join():175] waiting for file pusher
wandb/run-20240513_204644-y7fy6vtp/logs/debug.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
2
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Configure stats pid to 1244392
3
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
4
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Loading settings from /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/settings
5
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_parler_tts_training.py', 'program_abspath': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/run_parler_tts_training.py', 'program': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py'}
8
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_init.py:_log_setup():520] Logging user logs to /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/logs/debug.log
10
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_init.py:_log_setup():521] Logging internal logs to /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_204644-y7fy6vtp/logs/debug-internal.log
11
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_init.py:init():560] calling init triggers
12
+ 2024-05-13 20:46:44,143 INFO MainThread:1244392 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-05-13 20:46:44,144 INFO MainThread:1244392 [wandb_init.py:init():610] starting backend
15
+ 2024-05-13 20:46:44,144 INFO MainThread:1244392 [wandb_init.py:init():614] setting up manager
16
+ 2024-05-13 20:46:44,147 INFO MainThread:1244392 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-05-13 20:46:44,147 INFO MainThread:1244392 [wandb_init.py:init():622] backend started and connected
18
+ 2024-05-13 20:46:44,149 INFO MainThread:1244392 [wandb_init.py:init():711] updated telemetry
19
+ 2024-05-13 20:46:44,152 INFO MainThread:1244392 [wandb_init.py:init():744] communicating run to backend with 90.0 second timeout
20
+ 2024-05-13 20:46:44,531 INFO MainThread:1244392 [wandb_run.py:_on_init():2396] communicating current version
21
+ 2024-05-13 20:46:44,589 INFO MainThread:1244392 [wandb_run.py:_on_init():2405] got version response
22
+ 2024-05-13 20:46:44,590 INFO MainThread:1244392 [wandb_init.py:init():795] starting run threads in backend
23
+ 2024-05-13 20:46:47,163 INFO MainThread:1244392 [wandb_run.py:_console_start():2374] atexit reg
24
+ 2024-05-13 20:46:47,164 INFO MainThread:1244392 [wandb_run.py:_redirect():2229] redirect: wrap_raw
25
+ 2024-05-13 20:46:47,164 INFO MainThread:1244392 [wandb_run.py:_redirect():2294] Wrapping output streams.
26
+ 2024-05-13 20:46:47,164 INFO MainThread:1244392 [wandb_run.py:_redirect():2319] Redirects installed.
27
+ 2024-05-13 20:46:47,165 INFO MainThread:1244392 [wandb_init.py:init():838] run started, returning control to user process
28
+ 2024-05-13 20:46:47,165 INFO MainThread:1244392 [wandb_run.py:_config_callback():1376] config_cb None None {'learning_rate': 8e-05, 'model_name_or_path': 'parler-tts/parler_tts_mini_v0.1', 'num_train_epochs': 8.0, 'gradient_accumulation_steps': 8, 'per_device_train_batch_size': 16, 'global_batch_size': 16, 'mixed_precision': 'bf16', 'lr_scheduler_type': 'SchedulerType.COSINE', 'warmup_steps': 250, 'freeze_text_encoder': True, 'max_duration_in_seconds': 30.0, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.99, 'temperature': 1.0}
29
+ 2024-05-13 20:50:40,539 WARNING MsgRouterThr:1244392 [router.py:message_loop():77] message_loop has been closed
wandb/run-20240513_204644-y7fy6vtp/run-y7fy6vtp.wandb ADDED
Binary file (59.3 kB). View file
 
wandb/run-20240513_205248-d781ddha/files/conda-environment.yaml ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: venv
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - _libgcc_mutex=0.1=main
6
+ - _openmp_mutex=5.1=1_gnu
7
+ - bzip2=1.0.8=h5eee18b_6
8
+ - ca-certificates=2024.3.11=h06a4308_0
9
+ - ld_impl_linux-64=2.38=h1181459_1
10
+ - libffi=3.4.4=h6a678d5_1
11
+ - libgcc-ng=11.2.0=h1234567_1
12
+ - libgomp=11.2.0=h1234567_1
13
+ - libstdcxx-ng=11.2.0=h1234567_1
14
+ - libuuid=1.41.5=h5eee18b_0
15
+ - ncurses=6.4=h6a678d5_0
16
+ - openssl=3.0.13=h7f8727e_1
17
+ - pip=24.0=py311h06a4308_0
18
+ - python=3.11.9=h955ad1f_0
19
+ - readline=8.2=h5eee18b_0
20
+ - setuptools=69.5.1=py311h06a4308_0
21
+ - sqlite=3.45.3=h5eee18b_0
22
+ - tk=8.6.14=h39e8969_0
23
+ - wheel=0.43.0=py311h06a4308_0
24
+ - xz=5.4.6=h5eee18b_1
25
+ - zlib=1.2.13=h5eee18b_1
26
+ - pip:
27
+ - absl-py==2.1.0
28
+ - accelerate==0.30.0
29
+ - aiohttp==3.9.5
30
+ - aiosignal==1.3.1
31
+ - aniso8601==9.0.1
32
+ - annotated-types==0.6.0
33
+ - anyio==4.3.0
34
+ - argbind==0.3.7
35
+ - argon2-cffi==23.1.0
36
+ - argon2-cffi-bindings==21.2.0
37
+ - arrow==1.3.0
38
+ - asttokens==2.4.1
39
+ - async-lru==2.0.4
40
+ - attrs==23.2.0
41
+ - audioread==3.0.1
42
+ - babel==2.15.0
43
+ - beautifulsoup4==4.12.3
44
+ - bidict==0.23.1
45
+ - bitsandbytes==0.43.1
46
+ - bleach==6.1.0
47
+ - certifi==2024.2.2
48
+ - cffi==1.16.0
49
+ - charset-normalizer==3.3.2
50
+ - click==8.1.7
51
+ - coloredlogs==14.0
52
+ - comm==0.2.2
53
+ - contourpy==1.2.1
54
+ - cycler==0.12.1
55
+ - datasets==2.19.1
56
+ - debugpy==1.8.1
57
+ - decorator==5.1.1
58
+ - defusedxml==0.7.1
59
+ - descript-audio-codec==1.0.0
60
+ - descript-audiotools==0.7.2
61
+ - dill==0.3.8
62
+ - dnspython==2.3.0
63
+ - docker-pycreds==0.4.0
64
+ - docstring-parser==0.16
65
+ - editdistance==0.8.1
66
+ - einops==0.8.0
67
+ - et-xmlfile==1.1.0
68
+ - evaluate==0.4.2
69
+ - eventlet==0.36.1
70
+ - executing==2.0.1
71
+ - fastjsonschema==2.19.1
72
+ - ffmpy==0.3.2
73
+ - filelock==3.14.0
74
+ - fire==0.6.0
75
+ - flask==2.2.5
76
+ - flask-cors==4.0.1
77
+ - flask-restful==0.3.10
78
+ - flask-socketio==5.3.6
79
+ - flask-talisman==1.1.0
80
+ - flatten-dict==0.4.2
81
+ - fonttools==4.51.0
82
+ - fqdn==1.5.1
83
+ - frozenlist==1.4.1
84
+ - fsspec==2024.3.1
85
+ - future==1.0.0
86
+ - g2p==2.0.0
87
+ - gitdb==4.0.11
88
+ - gitpython==3.1.43
89
+ - greenlet==3.0.3
90
+ - grpcio==1.63.0
91
+ - h11==0.14.0
92
+ - httpcore==1.0.5
93
+ - httpx==0.27.0
94
+ - huggingface-hub==0.23.0
95
+ - humanfriendly==10.0
96
+ - idna==3.7
97
+ - importlib-resources==6.4.0
98
+ - ipdb==0.13.13
99
+ - ipykernel==6.29.4
100
+ - ipython==8.24.0
101
+ - isoduration==20.11.0
102
+ - itsdangerous==2.2.0
103
+ - jedi==0.19.1
104
+ - jinja2==3.1.4
105
+ - jiwer==3.0.4
106
+ - joblib==1.4.2
107
+ - json5==0.9.25
108
+ - jsonpointer==2.4
109
+ - jsonschema==4.22.0
110
+ - jsonschema-specifications==2023.12.1
111
+ - julius==0.2.7
112
+ - jupyter-client==8.6.1
113
+ - jupyter-core==5.7.2
114
+ - jupyter-events==0.10.0
115
+ - jupyter-lsp==2.2.5
116
+ - jupyter-server==2.14.0
117
+ - jupyter-server-terminals==0.5.3
118
+ - jupyterlab==4.2.0
119
+ - jupyterlab-pygments==0.3.0
120
+ - jupyterlab-server==2.27.1
121
+ - kiwisolver==1.4.5
122
+ - lazy-loader==0.4
123
+ - librosa==0.10.2
124
+ - llvmlite==0.42.0
125
+ - markdown==3.6
126
+ - markdown-it-py==3.0.0
127
+ - markdown2==2.4.13
128
+ - markupsafe==2.1.5
129
+ - matplotlib==3.8.4
130
+ - matplotlib-inline==0.1.7
131
+ - mdurl==0.1.2
132
+ - mistune==3.0.2
133
+ - mpmath==1.3.0
134
+ - msgpack==1.0.8
135
+ - multidict==6.0.5
136
+ - multiprocess==0.70.16
137
+ - munkres==1.1.4
138
+ - nbclient==0.10.0
139
+ - nbconvert==7.16.4
140
+ - nbformat==5.10.4
141
+ - nest-asyncio==1.6.0
142
+ - networkx==3.3
143
+ - notebook-shim==0.2.4
144
+ - numba==0.59.1
145
+ - numpy==1.26.4
146
+ - nvidia-cublas-cu12==12.1.3.1
147
+ - nvidia-cuda-cupti-cu12==12.1.105
148
+ - nvidia-cuda-nvrtc-cu12==12.1.105
149
+ - nvidia-cuda-runtime-cu12==12.1.105
150
+ - nvidia-cudnn-cu12==8.9.2.26
151
+ - nvidia-cufft-cu12==11.0.2.54
152
+ - nvidia-curand-cu12==10.3.2.106
153
+ - nvidia-cusolver-cu12==11.4.5.107
154
+ - nvidia-cusparse-cu12==12.1.0.106
155
+ - nvidia-nccl-cu12==2.20.5
156
+ - nvidia-nvjitlink-cu12==12.4.127
157
+ - nvidia-nvtx-cu12==12.1.105
158
+ - openpyxl==3.1.2
159
+ - overrides==7.7.0
160
+ - packaging==24.0
161
+ - pandas==2.2.2
162
+ - pandocfilters==1.5.1
163
+ - panphon==0.20.0
164
+ - parler-tts==0.1
165
+ - parso==0.8.4
166
+ - pexpect==4.9.0
167
+ - pillow==10.3.0
168
+ - platformdirs==4.2.1
169
+ - pooch==1.8.1
170
+ - prometheus-client==0.20.0
171
+ - prompt-toolkit==3.0.43
172
+ - protobuf==3.19.6
173
+ - psutil==5.9.8
174
+ - ptyprocess==0.7.0
175
+ - pure-eval==0.2.2
176
+ - pyarrow==16.0.0
177
+ - pyarrow-hotfix==0.6
178
+ - pycparser==2.22
179
+ - pydantic==2.7.1
180
+ - pydantic-core==2.18.2
181
+ - pygments==2.18.0
182
+ - pyloudnorm==0.1.1
183
+ - pyparsing==3.1.2
184
+ - pystoi==0.4.1
185
+ - python-dateutil==2.9.0.post0
186
+ - python-engineio==4.9.0
187
+ - python-json-logger==2.0.7
188
+ - python-socketio==5.11.2
189
+ - pytz==2024.1
190
+ - pyyaml==6.0.1
191
+ - pyzmq==26.0.3
192
+ - randomname==0.2.1
193
+ - rapidfuzz==3.9.0
194
+ - referencing==0.35.1
195
+ - regex==2024.4.28
196
+ - requests==2.31.0
197
+ - rfc3339-validator==0.1.4
198
+ - rfc3986-validator==0.1.1
199
+ - rich==13.7.1
200
+ - rpds-py==0.18.1
201
+ - safetensors==0.4.3
202
+ - scikit-learn==1.4.2
203
+ - scipy==1.13.0
204
+ - send2trash==1.8.3
205
+ - sentencepiece==0.2.0
206
+ - sentry-sdk==2.1.1
207
+ - setproctitle==1.3.3
208
+ - simple-websocket==1.0.0
209
+ - six==1.16.0
210
+ - smmap==5.0.1
211
+ - sniffio==1.3.1
212
+ - soundfile==0.12.1
213
+ - soupsieve==2.5
214
+ - soxr==0.3.7
215
+ - stack-data==0.6.3
216
+ - sympy==1.12
217
+ - tensorboard==2.16.2
218
+ - tensorboard-data-server==0.7.2
219
+ - termcolor==2.4.0
220
+ - terminado==0.18.1
221
+ - text-unidecode==1.3
222
+ - threadpoolctl==3.5.0
223
+ - tinycss2==1.3.0
224
+ - tokenizers==0.19.1
225
+ - torch==2.3.0
226
+ - torch-stoi==0.2.1
227
+ - torchaudio==2.3.0
228
+ - tornado==6.4
229
+ - tqdm==4.66.4
230
+ - traitlets==5.14.3
231
+ - transformers==4.41.0.dev0
232
+ - triton==2.3.0
233
+ - types-python-dateutil==2.9.0.20240316
234
+ - typing-extensions==4.11.0
235
+ - tzdata==2024.1
236
+ - unicodecsv==0.14.1
237
+ - uri-template==1.3.0
238
+ - urllib3==2.2.1
239
+ - wandb==0.17.0
240
+ - wcwidth==0.2.13
241
+ - webcolors==1.13
242
+ - webencodings==0.5.1
243
+ - websocket-client==1.8.0
244
+ - werkzeug==3.0.3
245
+ - wsproto==1.2.0
246
+ - xxhash==3.4.1
247
+ - yarl==1.9.4
248
+ prefix: /home/sanchit/miniconda3/envs/venv
wandb/run-20240513_205248-d781ddha/files/config.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.9
7
+ cli_version: 0.17.0
8
+ framework: huggingface
9
+ huggingface_version: 4.41.0.dev0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1715626368
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 49
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ - 100
24
+ 2:
25
+ - 1
26
+ - 5
27
+ - 11
28
+ - 49
29
+ - 51
30
+ - 53
31
+ - 55
32
+ - 71
33
+ - 100
34
+ 3:
35
+ - 23
36
+ 4: 3.11.9
37
+ 5: 0.17.0
38
+ 6: 4.41.0.dev0
39
+ 8:
40
+ - 5
41
+ 13: linux-x86_64
42
+ learning_rate:
43
+ desc: null
44
+ value: 8.0e-05
45
+ model_name_or_path:
46
+ desc: null
47
+ value: parler-tts/parler_tts_mini_v0.1
48
+ num_train_epochs:
49
+ desc: null
50
+ value: 8.0
51
+ gradient_accumulation_steps:
52
+ desc: null
53
+ value: 8
54
+ per_device_train_batch_size:
55
+ desc: null
56
+ value: 16
57
+ global_batch_size:
58
+ desc: null
59
+ value: 16
60
+ mixed_precision:
61
+ desc: null
62
+ value: bf16
63
+ lr_scheduler_type:
64
+ desc: null
65
+ value: SchedulerType.COSINE
66
+ warmup_steps:
67
+ desc: null
68
+ value: 250
69
+ freeze_text_encoder:
70
+ desc: null
71
+ value: true
72
+ max_duration_in_seconds:
73
+ desc: null
74
+ value: 30.0
75
+ weight_decay:
76
+ desc: null
77
+ value: 0.01
78
+ adam_beta1:
79
+ desc: null
80
+ value: 0.9
81
+ adam_beta2:
82
+ desc: null
83
+ value: 0.99
84
+ temperature:
85
+ desc: null
86
+ value: 1.0
wandb/run-20240513_205248-d781ddha/files/output.log ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/13/2024 20:52:51 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
2
+ 05/13/2024 20:52:51 - INFO - __main__ - Training/evaluation parameters ParlerTTSTrainingArguments(
3
+ _n_gpu=1,
4
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},
5
+ adafactor=False,
6
+ adam_beta1=0.9,
7
+ adam_beta2=0.99,
8
+ adam_epsilon=1e-08,
9
+ audio_encoder_per_device_batch_size=4,
10
+ auto_find_batch_size=False,
11
+ batch_eval_metrics=False,
12
+ bf16=False,
13
+ bf16_full_eval=False,
14
+ data_seed=None,
15
+ dataloader_drop_last=False,
16
+ dataloader_num_workers=4,
17
+ dataloader_persistent_workers=False,
18
+ dataloader_pin_memory=True,
19
+ dataloader_prefetch_factor=None,
20
+ ddp_backend=None,
21
+ ddp_broadcast_buffers=None,
22
+ ddp_bucket_cap_mb=None,
23
+ ddp_find_unused_parameters=None,
24
+ ddp_timeout=1800,
25
+ debug=[],
26
+ deepspeed=None,
27
+ disable_tqdm=False,
28
+ dispatch_batches=None,
29
+ do_eval=True,
30
+ do_predict=False,
31
+ do_train=True,
32
+ dtype=bfloat16,
33
+ eval_accumulation_steps=None,
34
+ eval_delay=0,
35
+ eval_do_concat_batches=True,
36
+ eval_steps=None,
37
+ eval_strategy=IntervalStrategy.EPOCH,
38
+ evaluation_strategy=epoch,
39
+ fp16=False,
40
+ fp16_backend=auto,
41
+ fp16_full_eval=False,
42
+ fp16_opt_level=O1,
43
+ fsdp=[],
44
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
45
+ fsdp_min_num_params=0,
46
+ fsdp_transformer_layer_cls_to_wrap=None,
47
+ full_determinism=False,
48
+ generation_config=None,
49
+ generation_max_length=None,
50
+ generation_num_beams=None,
51
+ gradient_accumulation_steps=8,
52
+ gradient_checkpointing=True,
53
+ gradient_checkpointing_kwargs=None,
54
+ greater_is_better=None,
55
+ group_by_length=True,
56
+ half_precision_backend=auto,
57
+ hub_always_push=False,
58
+ hub_model_id=None,
59
+ hub_private_repo=False,
60
+ hub_strategy=HubStrategy.EVERY_SAVE,
61
+ hub_token=<HUB_TOKEN>,
62
+ ignore_data_skip=False,
63
+ include_inputs_for_metrics=True,
64
+ include_num_input_tokens_seen=False,
65
+ include_tokens_per_second=False,
66
+ jit_mode_eval=False,
67
+ label_names=None,
68
+ label_smoothing_factor=0.0,
69
+ learning_rate=8e-05,
70
+ length_column_name=length,
71
+ load_best_model_at_end=False,
72
+ local_rank=0,
73
+ log_level=passive,
74
+ log_level_replica=warning,
75
+ log_on_each_node=True,
76
+ logging_dir=../output_dir_training_concat/runs/May13_20-52-47_hf-dgx-01,
77
+ logging_first_step=False,
78
+ logging_nan_inf_filter=True,
79
+ logging_steps=2,
80
+ logging_strategy=IntervalStrategy.STEPS,
81
+ lr_scheduler_kwargs={},
82
+ lr_scheduler_type=SchedulerType.COSINE,
83
+ max_grad_norm=1.0,
84
+ max_steps=-1,
85
+ metric_for_best_model=None,
86
+ mp_parameters=,
87
+ neftune_noise_alpha=None,
88
+ no_cuda=False,
89
+ num_train_epochs=8.0,
90
+ optim=OptimizerNames.ADAMW_TORCH,
91
+ optim_args=None,
92
+ optim_target_modules=None,
93
+ output_dir=../output_dir_training_concat/,
94
+ overwrite_output_dir=True,
95
+ past_index=-1,
96
+ per_device_eval_batch_size=16,
97
+ per_device_train_batch_size=16,
98
+ predict_with_generate=True,
99
+ prediction_loss_only=False,
100
+ push_to_hub=False,
101
+ push_to_hub_model_id=None,
102
+ push_to_hub_organization=None,
103
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
104
+ ray_scope=last,
105
+ remove_unused_columns=True,
106
+ report_to=['wandb'],
107
+ restore_callback_states_from_checkpoint=False,
108
+ resume_from_checkpoint=None,
109
+ run_name=../output_dir_training_concat/,
110
+ save_on_each_node=False,
111
+ save_only_model=False,
112
+ save_safetensors=True,
113
+ save_steps=72,
114
+ save_strategy=IntervalStrategy.STEPS,
115
+ save_total_limit=5,
116
+ seed=456,
117
+ skip_memory_metrics=True,
118
+ sortish_sampler=False,
119
+ split_batches=None,
120
+ tf32=None,
121
+ torch_compile=False,
122
+ torch_compile_backend=None,
123
+ torch_compile_mode=None,
124
+ torchdynamo=None,
125
+ tpu_metrics_debug=False,
126
+ tpu_num_cores=None,
127
+ use_cpu=False,
128
+ use_ipex=False,
129
+ use_legacy_prediction_loop=False,
130
+ use_mps_device=False,
131
+ warmup_ratio=0.0,
132
+ warmup_steps=250,
133
+ weight_decay=0.01,
134
+ )
135
+ 05/13/2024 20:52:53 - WARNING - __main__ - Disabling fast tokenizer warning: https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L3231-L3235
136
+ loading configuration file preprocessor_config.json from cache at /raid/.cache/huggingface/models--parler-tts--dac_44khZ_8kbps/snapshots/db52bea859d9411e0beb44a3ea923a8731ee4197/preprocessor_config.json
137
+ Feature extractor EncodecFeatureExtractor {
138
+ "chunk_length_s": null,
139
+ "feature_extractor_type": "EncodecFeatureExtractor",
140
+ "feature_size": 1,
141
+ "overlap": null,
142
+ "padding_side": "right",
143
+ "padding_value": 0.0,
144
+ "return_attention_mask": true,
145
+ "sampling_rate": 44100
146
+ }
147
+ loading file spiece.model from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/spiece.model
148
+ loading file tokenizer.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer.json
149
+ loading file added_tokens.json from cache at None
150
+ loading file special_tokens_map.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/special_tokens_map.json
151
+ loading file tokenizer_config.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer_config.json
152
+ You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
153
+ loading file spiece.model from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/spiece.model
154
+ loading file tokenizer.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer.json
155
+ loading file added_tokens.json from cache at None
156
+ loading file special_tokens_map.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/special_tokens_map.json
157
+ loading file tokenizer_config.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/tokenizer_config.json
158
+ Combining datasets...: 0%| | 0/1 [00:00<?, ?it/s]
159
+ Downloading readme: 100%|██████████████████████████████████████████████████████████████| 487/487 [00:00<00:00, 2.51MB/s]
160
+ Downloading readme: 0%| | 0.00/487 [00:00<?, ?B/s]
161
+ Downloading data: 0%| | 0.00/529k [00:00<?, ?B/s]
162
+ 05/13/2024 20:52:57 - WARNING - datasets.builder - Setting num_proc from 2 back to 1 for the train split to disable multiprocessing as it only contains one shard.
163
+ Setting num_proc from 2 back to 1 for the train split to disable multiprocessing as it only contains one shard.1.53MB/s]
164
+ Generating train split: 100%|████████████████████████████████████████████| 5347/5347 [00:00<00:00, 530764.98 examples/s]
165
+ Combining datasets...: 100%|██████████████████████████████████████████████████████████████| 1/1 [00:04<00:00, 4.79s/it]
166
+ Combining datasets...: 0%| | 0/1 [00:00<?, ?it/s]
167
+ 05/13/2024 20:52:59 - INFO - __main__ - Merging sanchit-gandhi/expresso-concatenated-half-normal - train with sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral - train
168
+ Combining datasets...: 100%|██████████████████████████████████████████████████████████████| 1/1 [00:03<00:00, 3.31s/it]
169
+ /home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
170
+ warnings.warn(
171
+ loading configuration file config.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/config.json
172
+ Model config ParlerTTSConfig {
173
+ "_name_or_path": "/fsx/yoach/tmp/artefacts/training-400M-punctuated-v2/",
174
+ "architectures": [
175
+ "ParlerTTSForConditionalGeneration"
176
+ ],
177
+ "audio_encoder": {
178
+ "_name_or_path": "ylacombe/dac_44khZ_8kbps",
179
+ "add_cross_attention": false,
180
+ "architectures": [
181
+ "DACModel"
182
+ ],
183
+ "bad_words_ids": null,
184
+ "begin_suppress_tokens": null,
185
+ "bos_token_id": null,
186
+ "chunk_size_feed_forward": 0,
187
+ "codebook_size": 1024,
188
+ "cross_attention_hidden_size": null,
189
+ "decoder_start_token_id": null,
190
+ "diversity_penalty": 0.0,
191
+ "do_sample": false,
192
+ "early_stopping": false,
193
+ "encoder_no_repeat_ngram_size": 0,
194
+ "eos_token_id": null,
195
+ "exponential_decay_length_penalty": null,
196
+ "finetuning_task": null,
197
+ "forced_bos_token_id": null,
198
+ "forced_eos_token_id": null,
199
+ "frame_rate": 86,
200
+ "id2label": {
201
+ "0": "LABEL_0",
202
+ "1": "LABEL_1"
203
+ },
204
+ "is_decoder": false,
205
+ "is_encoder_decoder": false,
206
+ "label2id": {
207
+ "LABEL_0": 0,
208
+ "LABEL_1": 1
209
+ },
210
+ "latent_dim": 1024,
211
+ "length_penalty": 1.0,
212
+ "max_length": 20,
213
+ "min_length": 0,
214
+ "model_bitrate": 8,
215
+ "model_type": "dac",
216
+ "no_repeat_ngram_size": 0,
217
+ "num_beam_groups": 1,
218
+ "num_beams": 1,
219
+ "num_codebooks": 9,
220
+ "num_return_sequences": 1,
221
+ "output_attentions": false,
222
+ "output_hidden_states": false,
223
+ "output_scores": false,
224
+ "pad_token_id": null,
225
+ "prefix": null,
226
+ "problem_type": null,
227
+ "pruned_heads": {},
228
+ "remove_invalid_values": false,
229
+ "repetition_penalty": 1.0,
230
+ "return_dict": true,
231
+ "return_dict_in_generate": false,
232
+ "sampling_rate": 44100,
233
+ "sep_token_id": null,
234
+ "suppress_tokens": null,
235
+ "task_specific_params": null,
236
+ "temperature": 1.0,
237
+ "tf_legacy_loss": false,
238
+ "tie_encoder_decoder": false,
239
+ "tie_word_embeddings": true,
240
+ "tokenizer_class": null,
241
+ "top_k": 50,
242
+ "top_p": 1.0,
243
+ "torch_dtype": "float32",
244
+ "torchscript": false,
245
+ "typical_p": 1.0,
246
+ "use_bfloat16": false
247
+ },
248
+ "decoder": {
249
+ "_name_or_path": "/fsx/yoach/tmp/artefacts/decoder_400M/",
250
+ "activation_dropout": 0.0,
251
+ "activation_function": "gelu",
252
+ "add_cross_attention": true,
253
+ "architectures": [
254
+ "ParlerTTSForCausalLM"
255
+ ],
256
+ "attention_dropout": 0.0,
257
+ "bad_words_ids": null,
258
+ "begin_suppress_tokens": null,
259
+ "bos_token_id": 1025,
260
+ "chunk_size_feed_forward": 0,
261
+ "cross_attention_hidden_size": null,
262
+ "decoder_start_token_id": null,
263
+ "diversity_penalty": 0.0,
264
+ "do_sample": false,
265
+ "dropout": 0.1,
266
+ "early_stopping": false,
267
+ "encoder_no_repeat_ngram_size": 0,
268
+ "eos_token_id": 1024,
269
+ "exponential_decay_length_penalty": null,
270
+ "ffn_dim": 4096,
271
+ "finetuning_task": null,
272
+ "forced_bos_token_id": null,
273
+ "forced_eos_token_id": null,
274
+ "hidden_size": 1024,
275
+ "id2label": {
276
+ "0": "LABEL_0",
277
+ "1": "LABEL_1"
278
+ },
279
+ "initializer_factor": 0.02,
280
+ "is_decoder": true,
281
+ "is_encoder_decoder": false,
282
+ "label2id": {
283
+ "LABEL_0": 0,
284
+ "LABEL_1": 1
285
+ },
286
+ "layerdrop": 0.0,
287
+ "length_penalty": 1.0,
288
+ "max_length": 20,
289
+ "max_position_embeddings": 4096,
290
+ "min_length": 0,
291
+ "model_type": "parler_tts_decoder",
292
+ "no_repeat_ngram_size": 0,
293
+ "num_attention_heads": 16,
294
+ "num_beam_groups": 1,
295
+ "num_beams": 1,
296
+ "num_codebooks": 9,
297
+ "num_hidden_layers": 24,
298
+ "num_return_sequences": 1,
299
+ "output_attentions": false,
300
+ "output_hidden_states": false,
301
+ "output_scores": false,
302
+ "pad_token_id": 1024,
303
+ "prefix": null,
304
+ "problem_type": null,
305
+ "pruned_heads": {},
306
+ "remove_invalid_values": false,
307
+ "repetition_penalty": 1.0,
308
+ "return_dict": true,
309
+ "return_dict_in_generate": false,
310
+ "scale_embedding": false,
311
+ "sep_token_id": null,
312
+ "suppress_tokens": null,
313
+ "task_specific_params": null,
314
+ "temperature": 1.0,
315
+ "tf_legacy_loss": false,
316
+ "tie_encoder_decoder": false,
317
+ "tie_word_embeddings": false,
318
+ "tokenizer_class": null,
319
+ "top_k": 50,
320
+ "top_p": 1.0,
321
+ "torch_dtype": "float32",
322
+ "torchscript": false,
323
+ "typical_p": 1.0,
324
+ "use_bfloat16": false,
325
+ "use_cache": true,
326
+ "vocab_size": 1088
327
+ },
328
+ "decoder_start_token_id": 1025,
329
+ "is_encoder_decoder": true,
330
+ "model_type": "parler_tts",
331
+ "pad_token_id": 1024,
332
+ "text_encoder": {
333
+ "_name_or_path": "google/flan-t5-base",
334
+ "add_cross_attention": false,
335
+ "architectures": [
336
+ "T5ForConditionalGeneration"
337
+ ],
338
+ "bad_words_ids": null,
339
+ "begin_suppress_tokens": null,
340
+ "bos_token_id": null,
341
+ "chunk_size_feed_forward": 0,
342
+ "classifier_dropout": 0.0,
343
+ "cross_attention_hidden_size": null,
344
+ "d_ff": 2048,
345
+ "d_kv": 64,
346
+ "d_model": 768,
347
+ "decoder_start_token_id": 0,
348
+ "dense_act_fn": "gelu_new",
349
+ "diversity_penalty": 0.0,
350
+ "do_sample": false,
351
+ "dropout_rate": 0.1,
352
+ "early_stopping": false,
353
+ "encoder_no_repeat_ngram_size": 0,
354
+ "eos_token_id": 1,
355
+ "exponential_decay_length_penalty": null,
356
+ "feed_forward_proj": "gated-gelu",
357
+ "finetuning_task": null,
358
+ "forced_bos_token_id": null,
359
+ "forced_eos_token_id": null,
360
+ "id2label": {
361
+ "0": "LABEL_0",
362
+ "1": "LABEL_1"
363
+ },
364
+ "initializer_factor": 1.0,
365
+ "is_decoder": false,
366
+ "is_encoder_decoder": true,
367
+ "is_gated_act": true,
368
+ "label2id": {
369
+ "LABEL_0": 0,
370
+ "LABEL_1": 1
371
+ },
372
+ "layer_norm_epsilon": 1e-06,
373
+ "length_penalty": 1.0,
374
+ "max_length": 20,
375
+ "min_length": 0,
376
+ "model_type": "t5",
377
+ "n_positions": 512,
378
+ "no_repeat_ngram_size": 0,
379
+ "num_beam_groups": 1,
380
+ "num_beams": 1,
381
+ "num_decoder_layers": 12,
382
+ "num_heads": 12,
383
+ "num_layers": 12,
384
+ "num_return_sequences": 1,
385
+ "output_attentions": false,
386
+ "output_hidden_states": false,
387
+ "output_past": true,
388
+ "output_scores": false,
389
+ "pad_token_id": 0,
390
+ "prefix": null,
391
+ "problem_type": null,
392
+ "pruned_heads": {},
393
+ "relative_attention_max_distance": 128,
394
+ "relative_attention_num_buckets": 32,
395
+ "remove_invalid_values": false,
396
+ "repetition_penalty": 1.0,
397
+ "return_dict": true,
398
+ "return_dict_in_generate": false,
399
+ "sep_token_id": null,
400
+ "suppress_tokens": null,
401
+ "task_specific_params": {
402
+ "summarization": {
403
+ "early_stopping": true,
404
+ "length_penalty": 2.0,
405
+ "max_length": 200,
406
+ "min_length": 30,
407
+ "no_repeat_ngram_size": 3,
408
+ "num_beams": 4,
409
+ "prefix": "summarize: "
410
+ },
411
+ "translation_en_to_de": {
412
+ "early_stopping": true,
413
+ "max_length": 300,
414
+ "num_beams": 4,
415
+ "prefix": "translate English to German: "
416
+ },
417
+ "translation_en_to_fr": {
418
+ "early_stopping": true,
419
+ "max_length": 300,
420
+ "num_beams": 4,
421
+ "prefix": "translate English to French: "
422
+ },
423
+ "translation_en_to_ro": {
424
+ "early_stopping": true,
425
+ "max_length": 300,
426
+ "num_beams": 4,
427
+ "prefix": "translate English to Romanian: "
428
+ }
429
+ },
430
+ "temperature": 1.0,
431
+ "tf_legacy_loss": false,
432
+ "tie_encoder_decoder": false,
433
+ "tie_word_embeddings": false,
434
+ "tokenizer_class": null,
435
+ "top_k": 50,
436
+ "top_p": 1.0,
437
+ "torch_dtype": null,
438
+ "torchscript": false,
439
+ "typical_p": 1.0,
440
+ "use_bfloat16": false,
441
+ "use_cache": true,
442
+ "vocab_size": 32128
443
+ },
444
+ "torch_dtype": "float32",
445
+ "transformers_version": "4.41.0.dev0",
446
+ "vocab_size": 32128
447
+ }
448
+ loading weights file model.safetensors from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/model.safetensors
449
+ Generate config GenerationConfig {
450
+ "decoder_start_token_id": 1025,
451
+ "pad_token_id": 1024
452
+ }
453
+ /home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
454
+ warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.")
455
+ Generate config GenerationConfig {
456
+ "bos_token_id": 1025,
457
+ "eos_token_id": 1024,
458
+ "pad_token_id": 1024
459
+ }
460
+ All model checkpoint weights were used when initializing ParlerTTSForConditionalGeneration.
461
+ All the weights of ParlerTTSForConditionalGeneration were initialized from the model checkpoint at parler-tts/parler_tts_mini_v0.1.
462
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use ParlerTTSForConditionalGeneration for predictions without further training.
463
+ loading configuration file generation_config.json from cache at /raid/.cache/huggingface/models--parler-tts--parler_tts_mini_v0.1/snapshots/e02fd18e77d38b49a85c7a9a85189a64b8472544/generation_config.json
464
+ Generate config GenerationConfig {
465
+ "bos_token_id": 1025,
466
+ "decoder_start_token_id": 1025,
467
+ "do_sample": true,
468
+ "eos_token_id": 1024,
469
+ "guidance_scale": 1.0,
470
+ "max_length": 2580,
471
+ "min_new_tokens": 50,
472
+ "pad_token_id": 1024
473
+ }
474
+ gathered_tensor tensor([0], device='cuda:0')
475
+
476
+ Filter (num_proc=2): 100%|██████████████████████████████████████████████████| 5347/5347 [00:05<00:00, 898.61 examples/s]
477
+
478
+ Filter (num_proc=2): 100%|█████████████████████████████████████████████████████████| 8/8 [00:06<00:00, 1.22 examples/s]
479
+
480
+ preprocess datasets (num_proc=2): 100%|█████████████████████████████████████| 5341/5341 [00:06<00:00, 766.49 examples/s]
481
+
482
+ preprocess datasets (num_proc=2): 50%|██████████████████████ | 4/8 [00:05<00:05, 1.27s/ examples]
483
+ preprocess datasets (num_proc=2): 100%|████████████████████████████████████████████| 8/8 [00:06<00:00, 1.30 examples/s]
484
+ 0%| | 0/1336 [00:00<?, ?it/s]/home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/torch/nn/modules/conv.py:306: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)
485
+ return F.conv1d(input, weight, bias, self.stride,
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+
547
+
548
+
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+
557
+
558
+
559
+
560
+
561
+
562
+
563
+
564
+
565
+
566
+
567
+ 100%|██████████████████████████████████████████████████████████████████████████████▉| 1335/1336 [02:49<00:00, 7.90it/s]
568
+ Traceback (most recent call last):
569
+ File "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py", line 1763, in <module>
570
+ main()
571
+ File "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py", line 1153, in main
572
+ lab = [l[:, : int(ratio * length)] for (l, ratio, length) in zip(lab, rat, lens)]
573
+ ^^^^^^^^^^^^^^^^^^^
574
+ File "/home/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/torch/_tensor.py", line 1047, in __iter__
575
+ raise TypeError("iteration over a 0-d tensor")
576
+ TypeError: iteration over a 0-d tensor
wandb/run-20240513_205248-d781ddha/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.15.0
2
+ Flask-Cors==4.0.1
3
+ Flask-RESTful==0.3.10
4
+ Flask-SocketIO==5.3.6
5
+ Flask==2.2.5
6
+ GitPython==3.1.43
7
+ Jinja2==3.1.4
8
+ Markdown==3.6
9
+ MarkupSafe==2.1.5
10
+ PyYAML==6.0.1
11
+ Pygments==2.18.0
12
+ Send2Trash==1.8.3
13
+ Werkzeug==3.0.3
14
+ absl-py==2.1.0
15
+ accelerate==0.30.0
16
+ aiohttp==3.9.5
17
+ aiosignal==1.3.1
18
+ aniso8601==9.0.1
19
+ annotated-types==0.6.0
20
+ anyio==4.3.0
21
+ argbind==0.3.7
22
+ argon2-cffi-bindings==21.2.0
23
+ argon2-cffi==23.1.0
24
+ arrow==1.3.0
25
+ asttokens==2.4.1
26
+ async-lru==2.0.4
27
+ attrs==23.2.0
28
+ audioread==3.0.1
29
+ beautifulsoup4==4.12.3
30
+ bidict==0.23.1
31
+ bitsandbytes==0.43.1
32
+ bleach==6.1.0
33
+ certifi==2024.2.2
34
+ cffi==1.16.0
35
+ charset-normalizer==3.3.2
36
+ click==8.1.7
37
+ coloredlogs==14.0
38
+ comm==0.2.2
39
+ contourpy==1.2.1
40
+ cycler==0.12.1
41
+ datasets==2.19.1
42
+ debugpy==1.8.1
43
+ decorator==5.1.1
44
+ defusedxml==0.7.1
45
+ descript-audio-codec==1.0.0
46
+ descript-audiotools==0.7.2
47
+ dill==0.3.8
48
+ dnspython==2.3.0
49
+ docker-pycreds==0.4.0
50
+ docstring_parser==0.16
51
+ editdistance==0.8.1
52
+ einops==0.8.0
53
+ et-xmlfile==1.1.0
54
+ evaluate==0.4.2
55
+ eventlet==0.36.1
56
+ executing==2.0.1
57
+ fastjsonschema==2.19.1
58
+ ffmpy==0.3.2
59
+ filelock==3.14.0
60
+ fire==0.6.0
61
+ flask-talisman==1.1.0
62
+ flatten-dict==0.4.2
63
+ fonttools==4.51.0
64
+ fqdn==1.5.1
65
+ frozenlist==1.4.1
66
+ fsspec==2024.3.1
67
+ future==1.0.0
68
+ g2p==2.0.0
69
+ gitdb==4.0.11
70
+ greenlet==3.0.3
71
+ grpcio==1.63.0
72
+ h11==0.14.0
73
+ httpcore==1.0.5
74
+ httpx==0.27.0
75
+ huggingface-hub==0.23.0
76
+ humanfriendly==10.0
77
+ idna==3.7
78
+ importlib_resources==6.4.0
79
+ ipdb==0.13.13
80
+ ipykernel==6.29.4
81
+ ipython==8.24.0
82
+ isoduration==20.11.0
83
+ itsdangerous==2.2.0
84
+ jedi==0.19.1
85
+ jiwer==3.0.4
86
+ joblib==1.4.2
87
+ json5==0.9.25
88
+ jsonpointer==2.4
89
+ jsonschema-specifications==2023.12.1
90
+ jsonschema==4.22.0
91
+ julius==0.2.7
92
+ jupyter-events==0.10.0
93
+ jupyter-lsp==2.2.5
94
+ jupyter_client==8.6.1
95
+ jupyter_core==5.7.2
96
+ jupyter_server==2.14.0
97
+ jupyter_server_terminals==0.5.3
98
+ jupyterlab==4.2.0
99
+ jupyterlab_pygments==0.3.0
100
+ jupyterlab_server==2.27.1
101
+ kiwisolver==1.4.5
102
+ lazy_loader==0.4
103
+ librosa==0.10.2
104
+ llvmlite==0.42.0
105
+ markdown-it-py==3.0.0
106
+ markdown2==2.4.13
107
+ matplotlib-inline==0.1.7
108
+ matplotlib==3.8.4
109
+ mdurl==0.1.2
110
+ mistune==3.0.2
111
+ mpmath==1.3.0
112
+ msgpack==1.0.8
113
+ multidict==6.0.5
114
+ multiprocess==0.70.16
115
+ munkres==1.1.4
116
+ nbclient==0.10.0
117
+ nbconvert==7.16.4
118
+ nbformat==5.10.4
119
+ nest-asyncio==1.6.0
120
+ networkx==3.3
121
+ notebook_shim==0.2.4
122
+ numba==0.59.1
123
+ numpy==1.26.4
124
+ nvidia-cublas-cu12==12.1.3.1
125
+ nvidia-cuda-cupti-cu12==12.1.105
126
+ nvidia-cuda-nvrtc-cu12==12.1.105
127
+ nvidia-cuda-runtime-cu12==12.1.105
128
+ nvidia-cudnn-cu12==8.9.2.26
129
+ nvidia-cufft-cu12==11.0.2.54
130
+ nvidia-curand-cu12==10.3.2.106
131
+ nvidia-cusolver-cu12==11.4.5.107
132
+ nvidia-cusparse-cu12==12.1.0.106
133
+ nvidia-nccl-cu12==2.20.5
134
+ nvidia-nvjitlink-cu12==12.4.127
135
+ nvidia-nvtx-cu12==12.1.105
136
+ openpyxl==3.1.2
137
+ overrides==7.7.0
138
+ packaging==24.0
139
+ pandas==2.2.2
140
+ pandocfilters==1.5.1
141
+ panphon==0.20.0
142
+ parler_tts==0.1
143
+ parso==0.8.4
144
+ pexpect==4.9.0
145
+ pillow==10.3.0
146
+ pip==24.0
147
+ platformdirs==4.2.1
148
+ pooch==1.8.1
149
+ prometheus_client==0.20.0
150
+ prompt-toolkit==3.0.43
151
+ protobuf==3.19.6
152
+ psutil==5.9.8
153
+ ptyprocess==0.7.0
154
+ pure-eval==0.2.2
155
+ pyarrow-hotfix==0.6
156
+ pyarrow==16.0.0
157
+ pycparser==2.22
158
+ pydantic==2.7.1
159
+ pydantic_core==2.18.2
160
+ pyloudnorm==0.1.1
161
+ pyparsing==3.1.2
162
+ pystoi==0.4.1
163
+ python-dateutil==2.9.0.post0
164
+ python-engineio==4.9.0
165
+ python-json-logger==2.0.7
166
+ python-socketio==5.11.2
167
+ pytz==2024.1
168
+ pyzmq==26.0.3
169
+ randomname==0.2.1
170
+ rapidfuzz==3.9.0
171
+ referencing==0.35.1
172
+ regex==2024.4.28
173
+ requests==2.31.0
174
+ rfc3339-validator==0.1.4
175
+ rfc3986-validator==0.1.1
176
+ rich==13.7.1
177
+ rpds-py==0.18.1
178
+ safetensors==0.4.3
179
+ scikit-learn==1.4.2
180
+ scipy==1.13.0
181
+ sentencepiece==0.2.0
182
+ sentry-sdk==2.1.1
183
+ setproctitle==1.3.3
184
+ setuptools==69.5.1
185
+ simple-websocket==1.0.0
186
+ six==1.16.0
187
+ smmap==5.0.1
188
+ sniffio==1.3.1
189
+ soundfile==0.12.1
190
+ soupsieve==2.5
191
+ soxr==0.3.7
192
+ stack-data==0.6.3
193
+ sympy==1.12
194
+ tensorboard-data-server==0.7.2
195
+ tensorboard==2.16.2
196
+ termcolor==2.4.0
197
+ terminado==0.18.1
198
+ text-unidecode==1.3
199
+ threadpoolctl==3.5.0
200
+ tinycss2==1.3.0
201
+ tokenizers==0.19.1
202
+ torch-stoi==0.2.1
203
+ torch==2.3.0
204
+ torchaudio==2.3.0
205
+ tornado==6.4
206
+ tqdm==4.66.4
207
+ traitlets==5.14.3
208
+ transformers==4.41.0.dev0
209
+ transformers==4.41.0.dev0
210
+ triton==2.3.0
211
+ types-python-dateutil==2.9.0.20240316
212
+ typing_extensions==4.11.0
213
+ tzdata==2024.1
214
+ unicodecsv==0.14.1
215
+ uri-template==1.3.0
216
+ urllib3==2.2.1
217
+ wandb==0.17.0
218
+ wcwidth==0.2.13
219
+ webcolors==1.13
220
+ webencodings==0.5.1
221
+ websocket-client==1.8.0
222
+ wheel==0.43.0
223
+ wsproto==1.2.0
224
+ xxhash==3.4.1
225
+ yarl==1.9.4
wandb/run-20240513_205248-d781ddha/files/wandb-metadata.json ADDED
@@ -0,0 +1,804 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-166-generic-x86_64-with-glibc2.31",
3
+ "python": "3.11.9",
4
+ "heartbeatAt": "2024-05-13T18:52:49.375376",
5
+ "startedAt": "2024-05-13T18:52:48.864820",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "--model_name_or_path",
10
+ "parler-tts/parler_tts_mini_v0.1",
11
+ "--feature_extractor_name",
12
+ "parler-tts/dac_44khZ_8kbps",
13
+ "--description_tokenizer_name",
14
+ "parler-tts/parler_tts_mini_v0.1",
15
+ "--prompt_tokenizer_name",
16
+ "parler-tts/parler_tts_mini_v0.1",
17
+ "--report_to",
18
+ "wandb",
19
+ "--overwrite_output_dir",
20
+ "true",
21
+ "--train_dataset_name",
22
+ "sanchit-gandhi/expresso-concatenated-half-normal",
23
+ "--train_metadata_dataset_name",
24
+ "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral",
25
+ "--train_dataset_config_name",
26
+ "read",
27
+ "--train_split_name",
28
+ "train",
29
+ "--eval_dataset_name",
30
+ "sanchit-gandhi/expresso-concatenated-half-normal",
31
+ "--eval_metadata_dataset_name",
32
+ "sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral",
33
+ "--eval_dataset_config_name",
34
+ "read",
35
+ "--eval_split_name",
36
+ "train",
37
+ "--max_eval_samples",
38
+ "8",
39
+ "--per_device_eval_batch_size",
40
+ "16",
41
+ "--target_audio_column_name",
42
+ "audio",
43
+ "--description_column_name",
44
+ "text_description",
45
+ "--prompt_column_name",
46
+ "text",
47
+ "--max_duration_in_seconds",
48
+ "30.0",
49
+ "--min_duration_in_seconds",
50
+ "2.0",
51
+ "--max_text_length",
52
+ "400",
53
+ "--preprocessing_num_workers",
54
+ "2",
55
+ "--do_train",
56
+ "true",
57
+ "--num_train_epochs",
58
+ "8",
59
+ "--gradient_accumulation_steps",
60
+ "8",
61
+ "--gradient_checkpointing",
62
+ "true",
63
+ "--per_device_train_batch_size",
64
+ "16",
65
+ "--learning_rate",
66
+ "0.00008",
67
+ "--adam_beta1",
68
+ "0.9",
69
+ "--adam_beta2",
70
+ "0.99",
71
+ "--weight_decay",
72
+ "0.01",
73
+ "--lr_scheduler_type",
74
+ "cosine",
75
+ "--warmup_steps",
76
+ "250",
77
+ "--logging_steps",
78
+ "2",
79
+ "--freeze_text_encoder",
80
+ "true",
81
+ "--audio_encoder_per_device_batch_size",
82
+ "4",
83
+ "--dtype",
84
+ "bfloat16",
85
+ "--seed",
86
+ "456",
87
+ "--output_dir",
88
+ "../output_dir_training_concat/",
89
+ "--temporary_save_to_disk",
90
+ "../audio_code_tmp_concat/",
91
+ "--save_to_disk",
92
+ "../tmp_dataset_audio_concat/",
93
+ "--dataloader_num_workers",
94
+ "4",
95
+ "--do_eval",
96
+ "--predict_with_generate",
97
+ "--include_inputs_for_metrics",
98
+ "--save_strategy",
99
+ "steps",
100
+ "--save_steps",
101
+ "72",
102
+ "--evaluation_strategy",
103
+ "epoch",
104
+ "--save_total_limit",
105
+ "5",
106
+ "--group_by_length",
107
+ "true"
108
+ ],
109
+ "state": "running",
110
+ "program": "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py",
111
+ "codePathLocal": "run_parler_tts_training.py",
112
+ "codePath": "run_parler_tts_training.py",
113
+ "git": {
114
+ "remote": "https://huggingface.co/sanchit-gandhi/parler-tts-mini-v0.1-expresso-concatenated",
115
+ "commit": "99f75adb5e13ee0ad87cce8deb3e71adc370b10e"
116
+ },
117
+ "email": "sanchit@huggingface.co",
118
+ "root": "/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated",
119
+ "host": "hf-dgx-01",
120
+ "username": "sanchit",
121
+ "executable": "/home/sanchit/miniconda3/envs/venv/bin/python",
122
+ "cpu_count": 64,
123
+ "cpu_count_logical": 128,
124
+ "cpu_freq": {
125
+ "current": 2187.8050625000005,
126
+ "min": 1500.0,
127
+ "max": 2250.0
128
+ },
129
+ "cpu_freq_per_core": [
130
+ {
131
+ "current": 1713.071,
132
+ "min": 1500.0,
133
+ "max": 2250.0
134
+ },
135
+ {
136
+ "current": 1712.469,
137
+ "min": 1500.0,
138
+ "max": 2250.0
139
+ },
140
+ {
141
+ "current": 2280.258,
142
+ "min": 1500.0,
143
+ "max": 2250.0
144
+ },
145
+ {
146
+ "current": 1717.303,
147
+ "min": 1500.0,
148
+ "max": 2250.0
149
+ },
150
+ {
151
+ "current": 3312.578,
152
+ "min": 1500.0,
153
+ "max": 2250.0
154
+ },
155
+ {
156
+ "current": 3051.416,
157
+ "min": 1500.0,
158
+ "max": 2250.0
159
+ },
160
+ {
161
+ "current": 1658.955,
162
+ "min": 1500.0,
163
+ "max": 2250.0
164
+ },
165
+ {
166
+ "current": 1663.383,
167
+ "min": 1500.0,
168
+ "max": 2250.0
169
+ },
170
+ {
171
+ "current": 3332.084,
172
+ "min": 1500.0,
173
+ "max": 2250.0
174
+ },
175
+ {
176
+ "current": 1663.447,
177
+ "min": 1500.0,
178
+ "max": 2250.0
179
+ },
180
+ {
181
+ "current": 1663.494,
182
+ "min": 1500.0,
183
+ "max": 2250.0
184
+ },
185
+ {
186
+ "current": 1665.248,
187
+ "min": 1500.0,
188
+ "max": 2250.0
189
+ },
190
+ {
191
+ "current": 1793.968,
192
+ "min": 1500.0,
193
+ "max": 2250.0
194
+ },
195
+ {
196
+ "current": 1794.861,
197
+ "min": 1500.0,
198
+ "max": 2250.0
199
+ },
200
+ {
201
+ "current": 1793.472,
202
+ "min": 1500.0,
203
+ "max": 2250.0
204
+ },
205
+ {
206
+ "current": 1792.28,
207
+ "min": 1500.0,
208
+ "max": 2250.0
209
+ },
210
+ {
211
+ "current": 3341.098,
212
+ "min": 1500.0,
213
+ "max": 2250.0
214
+ },
215
+ {
216
+ "current": 1659.571,
217
+ "min": 1500.0,
218
+ "max": 2250.0
219
+ },
220
+ {
221
+ "current": 2090.509,
222
+ "min": 1500.0,
223
+ "max": 2250.0
224
+ },
225
+ {
226
+ "current": 3330.01,
227
+ "min": 1500.0,
228
+ "max": 2250.0
229
+ },
230
+ {
231
+ "current": 1661.972,
232
+ "min": 1500.0,
233
+ "max": 2250.0
234
+ },
235
+ {
236
+ "current": 1661.373,
237
+ "min": 1500.0,
238
+ "max": 2250.0
239
+ },
240
+ {
241
+ "current": 2744.439,
242
+ "min": 1500.0,
243
+ "max": 2250.0
244
+ },
245
+ {
246
+ "current": 3341.302,
247
+ "min": 1500.0,
248
+ "max": 2250.0
249
+ },
250
+ {
251
+ "current": 1931.367,
252
+ "min": 1500.0,
253
+ "max": 2250.0
254
+ },
255
+ {
256
+ "current": 1663.659,
257
+ "min": 1500.0,
258
+ "max": 2250.0
259
+ },
260
+ {
261
+ "current": 3325.735,
262
+ "min": 1500.0,
263
+ "max": 2250.0
264
+ },
265
+ {
266
+ "current": 1663.731,
267
+ "min": 1500.0,
268
+ "max": 2250.0
269
+ },
270
+ {
271
+ "current": 1715.888,
272
+ "min": 1500.0,
273
+ "max": 2250.0
274
+ },
275
+ {
276
+ "current": 1713.973,
277
+ "min": 1500.0,
278
+ "max": 2250.0
279
+ },
280
+ {
281
+ "current": 2361.772,
282
+ "min": 1500.0,
283
+ "max": 2250.0
284
+ },
285
+ {
286
+ "current": 1717.358,
287
+ "min": 1500.0,
288
+ "max": 2250.0
289
+ },
290
+ {
291
+ "current": 1716.521,
292
+ "min": 1500.0,
293
+ "max": 2250.0
294
+ },
295
+ {
296
+ "current": 2341.51,
297
+ "min": 1500.0,
298
+ "max": 2250.0
299
+ },
300
+ {
301
+ "current": 1715.513,
302
+ "min": 1500.0,
303
+ "max": 2250.0
304
+ },
305
+ {
306
+ "current": 1711.953,
307
+ "min": 1500.0,
308
+ "max": 2250.0
309
+ },
310
+ {
311
+ "current": 1793.443,
312
+ "min": 1500.0,
313
+ "max": 2250.0
314
+ },
315
+ {
316
+ "current": 1796.101,
317
+ "min": 1500.0,
318
+ "max": 2250.0
319
+ },
320
+ {
321
+ "current": 1794.087,
322
+ "min": 1500.0,
323
+ "max": 2250.0
324
+ },
325
+ {
326
+ "current": 1794.423,
327
+ "min": 1500.0,
328
+ "max": 2250.0
329
+ },
330
+ {
331
+ "current": 1790.765,
332
+ "min": 1500.0,
333
+ "max": 2250.0
334
+ },
335
+ {
336
+ "current": 1796.312,
337
+ "min": 1500.0,
338
+ "max": 2250.0
339
+ },
340
+ {
341
+ "current": 1792.734,
342
+ "min": 1500.0,
343
+ "max": 2250.0
344
+ },
345
+ {
346
+ "current": 1794.642,
347
+ "min": 1500.0,
348
+ "max": 2250.0
349
+ },
350
+ {
351
+ "current": 1659.721,
352
+ "min": 1500.0,
353
+ "max": 2250.0
354
+ },
355
+ {
356
+ "current": 1663.181,
357
+ "min": 1500.0,
358
+ "max": 2250.0
359
+ },
360
+ {
361
+ "current": 3315.724,
362
+ "min": 1500.0,
363
+ "max": 2250.0
364
+ },
365
+ {
366
+ "current": 1660.427,
367
+ "min": 1500.0,
368
+ "max": 2250.0
369
+ },
370
+ {
371
+ "current": 2966.59,
372
+ "min": 1500.0,
373
+ "max": 2250.0
374
+ },
375
+ {
376
+ "current": 1668.725,
377
+ "min": 1500.0,
378
+ "max": 2250.0
379
+ },
380
+ {
381
+ "current": 3300.161,
382
+ "min": 1500.0,
383
+ "max": 2250.0
384
+ },
385
+ {
386
+ "current": 1671.705,
387
+ "min": 1500.0,
388
+ "max": 2250.0
389
+ },
390
+ {
391
+ "current": 1661.312,
392
+ "min": 1500.0,
393
+ "max": 2250.0
394
+ },
395
+ {
396
+ "current": 3339.377,
397
+ "min": 1500.0,
398
+ "max": 2250.0
399
+ },
400
+ {
401
+ "current": 3333.308,
402
+ "min": 1500.0,
403
+ "max": 2250.0
404
+ },
405
+ {
406
+ "current": 1662.747,
407
+ "min": 1500.0,
408
+ "max": 2250.0
409
+ },
410
+ {
411
+ "current": 1660.781,
412
+ "min": 1500.0,
413
+ "max": 2250.0
414
+ },
415
+ {
416
+ "current": 1660.176,
417
+ "min": 1500.0,
418
+ "max": 2250.0
419
+ },
420
+ {
421
+ "current": 1664.39,
422
+ "min": 1500.0,
423
+ "max": 2250.0
424
+ },
425
+ {
426
+ "current": 3336.046,
427
+ "min": 1500.0,
428
+ "max": 2250.0
429
+ },
430
+ {
431
+ "current": 1793.809,
432
+ "min": 1500.0,
433
+ "max": 2250.0
434
+ },
435
+ {
436
+ "current": 1794.079,
437
+ "min": 1500.0,
438
+ "max": 2250.0
439
+ },
440
+ {
441
+ "current": 1594.389,
442
+ "min": 1500.0,
443
+ "max": 2250.0
444
+ },
445
+ {
446
+ "current": 1622.233,
447
+ "min": 1500.0,
448
+ "max": 2250.0
449
+ },
450
+ {
451
+ "current": 2414.464,
452
+ "min": 1500.0,
453
+ "max": 2250.0
454
+ },
455
+ {
456
+ "current": 2429.082,
457
+ "min": 1500.0,
458
+ "max": 2250.0
459
+ },
460
+ {
461
+ "current": 2278.496,
462
+ "min": 1500.0,
463
+ "max": 2250.0
464
+ },
465
+ {
466
+ "current": 2421.333,
467
+ "min": 1500.0,
468
+ "max": 2250.0
469
+ },
470
+ {
471
+ "current": 3330.367,
472
+ "min": 1500.0,
473
+ "max": 2250.0
474
+ },
475
+ {
476
+ "current": 2412.212,
477
+ "min": 1500.0,
478
+ "max": 2250.0
479
+ },
480
+ {
481
+ "current": 3102.925,
482
+ "min": 1500.0,
483
+ "max": 2250.0
484
+ },
485
+ {
486
+ "current": 2356.416,
487
+ "min": 1500.0,
488
+ "max": 2250.0
489
+ },
490
+ {
491
+ "current": 3312.565,
492
+ "min": 1500.0,
493
+ "max": 2250.0
494
+ },
495
+ {
496
+ "current": 3044.183,
497
+ "min": 1500.0,
498
+ "max": 2250.0
499
+ },
500
+ {
501
+ "current": 3296.93,
502
+ "min": 1500.0,
503
+ "max": 2250.0
504
+ },
505
+ {
506
+ "current": 3060.353,
507
+ "min": 1500.0,
508
+ "max": 2250.0
509
+ },
510
+ {
511
+ "current": 2411.85,
512
+ "min": 1500.0,
513
+ "max": 2250.0
514
+ },
515
+ {
516
+ "current": 2433.228,
517
+ "min": 1500.0,
518
+ "max": 2250.0
519
+ },
520
+ {
521
+ "current": 2464.094,
522
+ "min": 1500.0,
523
+ "max": 2250.0
524
+ },
525
+ {
526
+ "current": 2438.304,
527
+ "min": 1500.0,
528
+ "max": 2250.0
529
+ },
530
+ {
531
+ "current": 3306.628,
532
+ "min": 1500.0,
533
+ "max": 2250.0
534
+ },
535
+ {
536
+ "current": 3318.093,
537
+ "min": 1500.0,
538
+ "max": 2250.0
539
+ },
540
+ {
541
+ "current": 2748.09,
542
+ "min": 1500.0,
543
+ "max": 2250.0
544
+ },
545
+ {
546
+ "current": 3318.082,
547
+ "min": 1500.0,
548
+ "max": 2250.0
549
+ },
550
+ {
551
+ "current": 3316.036,
552
+ "min": 1500.0,
553
+ "max": 2250.0
554
+ },
555
+ {
556
+ "current": 3318.945,
557
+ "min": 1500.0,
558
+ "max": 2250.0
559
+ },
560
+ {
561
+ "current": 3313.086,
562
+ "min": 1500.0,
563
+ "max": 2250.0
564
+ },
565
+ {
566
+ "current": 3340.023,
567
+ "min": 1500.0,
568
+ "max": 2250.0
569
+ },
570
+ {
571
+ "current": 3306.8,
572
+ "min": 1500.0,
573
+ "max": 2250.0
574
+ },
575
+ {
576
+ "current": 2153.453,
577
+ "min": 1500.0,
578
+ "max": 2250.0
579
+ },
580
+ {
581
+ "current": 2690.778,
582
+ "min": 1500.0,
583
+ "max": 2250.0
584
+ },
585
+ {
586
+ "current": 2155.725,
587
+ "min": 1500.0,
588
+ "max": 2250.0
589
+ },
590
+ {
591
+ "current": 2149.357,
592
+ "min": 1500.0,
593
+ "max": 2250.0
594
+ },
595
+ {
596
+ "current": 2147.249,
597
+ "min": 1500.0,
598
+ "max": 2250.0
599
+ },
600
+ {
601
+ "current": 2359.311,
602
+ "min": 1500.0,
603
+ "max": 2250.0
604
+ },
605
+ {
606
+ "current": 2149.448,
607
+ "min": 1500.0,
608
+ "max": 2250.0
609
+ },
610
+ {
611
+ "current": 2160.745,
612
+ "min": 1500.0,
613
+ "max": 2250.0
614
+ },
615
+ {
616
+ "current": 2370.02,
617
+ "min": 1500.0,
618
+ "max": 2250.0
619
+ },
620
+ {
621
+ "current": 2125.471,
622
+ "min": 1500.0,
623
+ "max": 2250.0
624
+ },
625
+ {
626
+ "current": 2125.105,
627
+ "min": 1500.0,
628
+ "max": 2250.0
629
+ },
630
+ {
631
+ "current": 1795.448,
632
+ "min": 1500.0,
633
+ "max": 2250.0
634
+ },
635
+ {
636
+ "current": 1792.173,
637
+ "min": 1500.0,
638
+ "max": 2250.0
639
+ },
640
+ {
641
+ "current": 1795.046,
642
+ "min": 1500.0,
643
+ "max": 2250.0
644
+ },
645
+ {
646
+ "current": 1795.921,
647
+ "min": 1500.0,
648
+ "max": 2250.0
649
+ },
650
+ {
651
+ "current": 1793.492,
652
+ "min": 1500.0,
653
+ "max": 2250.0
654
+ },
655
+ {
656
+ "current": 1794.268,
657
+ "min": 1500.0,
658
+ "max": 2250.0
659
+ },
660
+ {
661
+ "current": 1795.235,
662
+ "min": 1500.0,
663
+ "max": 2250.0
664
+ },
665
+ {
666
+ "current": 1792.405,
667
+ "min": 1500.0,
668
+ "max": 2250.0
669
+ },
670
+ {
671
+ "current": 1657.138,
672
+ "min": 1500.0,
673
+ "max": 2250.0
674
+ },
675
+ {
676
+ "current": 1656.563,
677
+ "min": 1500.0,
678
+ "max": 2250.0
679
+ },
680
+ {
681
+ "current": 3314.001,
682
+ "min": 1500.0,
683
+ "max": 2250.0
684
+ },
685
+ {
686
+ "current": 1656.667,
687
+ "min": 1500.0,
688
+ "max": 2250.0
689
+ },
690
+ {
691
+ "current": 2138.701,
692
+ "min": 1500.0,
693
+ "max": 2250.0
694
+ },
695
+ {
696
+ "current": 1655.678,
697
+ "min": 1500.0,
698
+ "max": 2250.0
699
+ },
700
+ {
701
+ "current": 3312.257,
702
+ "min": 1500.0,
703
+ "max": 2250.0
704
+ },
705
+ {
706
+ "current": 1653.093,
707
+ "min": 1500.0,
708
+ "max": 2250.0
709
+ },
710
+ {
711
+ "current": 1656.024,
712
+ "min": 1500.0,
713
+ "max": 2250.0
714
+ },
715
+ {
716
+ "current": 3297.182,
717
+ "min": 1500.0,
718
+ "max": 2250.0
719
+ },
720
+ {
721
+ "current": 3303.429,
722
+ "min": 1500.0,
723
+ "max": 2250.0
724
+ },
725
+ {
726
+ "current": 1657.178,
727
+ "min": 1500.0,
728
+ "max": 2250.0
729
+ },
730
+ {
731
+ "current": 1670.781,
732
+ "min": 1500.0,
733
+ "max": 2250.0
734
+ },
735
+ {
736
+ "current": 1655.864,
737
+ "min": 1500.0,
738
+ "max": 2250.0
739
+ },
740
+ {
741
+ "current": 1655.356,
742
+ "min": 1500.0,
743
+ "max": 2250.0
744
+ },
745
+ {
746
+ "current": 3328.646,
747
+ "min": 1500.0,
748
+ "max": 2250.0
749
+ },
750
+ {
751
+ "current": 1794.456,
752
+ "min": 1500.0,
753
+ "max": 2250.0
754
+ },
755
+ {
756
+ "current": 1796.91,
757
+ "min": 1500.0,
758
+ "max": 2250.0
759
+ },
760
+ {
761
+ "current": 1795.289,
762
+ "min": 1500.0,
763
+ "max": 2250.0
764
+ },
765
+ {
766
+ "current": 1793.508,
767
+ "min": 1500.0,
768
+ "max": 2250.0
769
+ }
770
+ ],
771
+ "disk": {
772
+ "/": {
773
+ "total": 1757.8785285949707,
774
+ "used": 1663.5053787231445
775
+ }
776
+ },
777
+ "gpu": "NVIDIA A100-SXM4-80GB",
778
+ "gpu_count": 5,
779
+ "gpu_devices": [
780
+ {
781
+ "name": "NVIDIA A100-SXM4-80GB",
782
+ "memory_total": 85899345920
783
+ },
784
+ {
785
+ "name": "NVIDIA A100-SXM4-80GB",
786
+ "memory_total": 85899345920
787
+ },
788
+ {
789
+ "name": "NVIDIA A100-SXM4-80GB",
790
+ "memory_total": 85899345920
791
+ },
792
+ {
793
+ "name": "NVIDIA DGX Display",
794
+ "memory_total": 4294967296
795
+ },
796
+ {
797
+ "name": "NVIDIA A100-SXM4-80GB",
798
+ "memory_total": 85899345920
799
+ }
800
+ ],
801
+ "memory": {
802
+ "total": 503.5396919250488
803
+ }
804
+ }
wandb/run-20240513_205248-d781ddha/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb": {"runtime": 215}}
wandb/run-20240513_205248-d781ddha/logs/debug-internal.log ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-13 20:52:48,872 INFO StreamThr :1257957 [internal.py:wandb_internal():85] W&B internal server running at pid: 1257957, started at: 2024-05-13 20:52:48.871624
2
+ 2024-05-13 20:52:48,874 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status
3
+ 2024-05-13 20:52:48,876 INFO WriterThread:1257957 [datastore.py:open_for_write():87] open: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/run-d781ddha.wandb
4
+ 2024-05-13 20:52:48,877 DEBUG SenderThread:1257957 [sender.py:send():378] send: header
5
+ 2024-05-13 20:52:48,877 DEBUG SenderThread:1257957 [sender.py:send():378] send: run
6
+ 2024-05-13 20:52:49,236 INFO SenderThread:1257957 [dir_watcher.py:__init__():211] watching files in: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files
7
+ 2024-05-13 20:52:49,236 INFO SenderThread:1257957 [sender.py:_start_run_threads():1123] run started: d781ddha with start time 1715626368.870296
8
+ 2024-05-13 20:52:49,241 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: check_version
9
+ 2024-05-13 20:52:49,241 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: check_version
10
+ 2024-05-13 20:52:49,306 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: run_start
11
+ 2024-05-13 20:52:49,337 DEBUG HandlerThread:1257957 [system_info.py:__init__():26] System info init
12
+ 2024-05-13 20:52:49,337 DEBUG HandlerThread:1257957 [system_info.py:__init__():41] System info init done
13
+ 2024-05-13 20:52:49,337 INFO HandlerThread:1257957 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-05-13 20:52:49,338 INFO SystemMonitor:1257957 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-05-13 20:52:49,338 INFO HandlerThread:1257957 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-05-13 20:52:49,338 INFO SystemMonitor:1257957 [interfaces.py:start():188] Started cpu monitoring
17
+ 2024-05-13 20:52:49,339 INFO SystemMonitor:1257957 [interfaces.py:start():188] Started disk monitoring
18
+ 2024-05-13 20:52:49,339 INFO SystemMonitor:1257957 [interfaces.py:start():188] Started gpu monitoring
19
+ 2024-05-13 20:52:49,340 INFO SystemMonitor:1257957 [interfaces.py:start():188] Started memory monitoring
20
+ 2024-05-13 20:52:49,341 INFO SystemMonitor:1257957 [interfaces.py:start():188] Started network monitoring
21
+ 2024-05-13 20:52:49,375 DEBUG HandlerThread:1257957 [system_info.py:probe():150] Probing system
22
+ 2024-05-13 20:52:49,377 DEBUG HandlerThread:1257957 [system_info.py:_probe_git():135] Probing git
23
+ 2024-05-13 20:52:49,382 DEBUG HandlerThread:1257957 [system_info.py:_probe_git():143] Probing git done
24
+ 2024-05-13 20:52:49,382 DEBUG HandlerThread:1257957 [system_info.py:probe():198] Probing system done
25
+ 2024-05-13 20:52:49,382 DEBUG HandlerThread:1257957 [system_monitor.py:probe():223] {'os': 'Linux-5.4.0-166-generic-x86_64-with-glibc2.31', 'python': '3.11.9', 'heartbeatAt': '2024-05-13T18:52:49.375376', 'startedAt': '2024-05-13T18:52:48.864820', 'docker': None, 'cuda': None, 'args': ('--model_name_or_path', 'parler-tts/parler_tts_mini_v0.1', '--feature_extractor_name', 'parler-tts/dac_44khZ_8kbps', '--description_tokenizer_name', 'parler-tts/parler_tts_mini_v0.1', '--prompt_tokenizer_name', 'parler-tts/parler_tts_mini_v0.1', '--report_to', 'wandb', '--overwrite_output_dir', 'true', '--train_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal', '--train_metadata_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral', '--train_dataset_config_name', 'read', '--train_split_name', 'train', '--eval_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal', '--eval_metadata_dataset_name', 'sanchit-gandhi/expresso-concatenated-half-normal-tags-mistral', '--eval_dataset_config_name', 'read', '--eval_split_name', 'train', '--max_eval_samples', '8', '--per_device_eval_batch_size', '16', '--target_audio_column_name', 'audio', '--description_column_name', 'text_description', '--prompt_column_name', 'text', '--max_duration_in_seconds', '30.0', '--min_duration_in_seconds', '2.0', '--max_text_length', '400', '--preprocessing_num_workers', '2', '--do_train', 'true', '--num_train_epochs', '8', '--gradient_accumulation_steps', '8', '--gradient_checkpointing', 'true', '--per_device_train_batch_size', '16', '--learning_rate', '0.00008', '--adam_beta1', '0.9', '--adam_beta2', '0.99', '--weight_decay', '0.01', '--lr_scheduler_type', 'cosine', '--warmup_steps', '250', '--logging_steps', '2', '--freeze_text_encoder', 'true', '--audio_encoder_per_device_batch_size', '4', '--dtype', 'bfloat16', '--seed', '456', '--output_dir', '../output_dir_training_concat/', '--temporary_save_to_disk', '../audio_code_tmp_concat/', '--save_to_disk', '../tmp_dataset_audio_concat/', '--dataloader_num_workers', '4', '--do_eval', '--predict_with_generate', '--include_inputs_for_metrics', '--save_strategy', 'steps', '--save_steps', '72', '--evaluation_strategy', 'epoch', '--save_total_limit', '5', '--group_by_length', 'true'), 'state': 'running', 'program': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/./run_parler_tts_training.py', 'codePathLocal': 'run_parler_tts_training.py', 'codePath': 'run_parler_tts_training.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/parler-tts-mini-v0.1-expresso-concatenated', 'commit': '99f75adb5e13ee0ad87cce8deb3e71adc370b10e'}, 'email': 'sanchit@huggingface.co', 'root': '/raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated', 'host': 'hf-dgx-01', 'username': 'sanchit', 'executable': '/home/sanchit/miniconda3/envs/venv/bin/python', 'cpu_count': 64, 'cpu_count_logical': 128, 'cpu_freq': {'current': 2187.8050625000005, 'min': 1500.0, 'max': 2250.0}, 'cpu_freq_per_core': [{'current': 1713.071, 'min': 1500.0, 'max': 2250.0}, {'current': 1712.469, 'min': 1500.0, 'max': 2250.0}, {'current': 2280.258, 'min': 1500.0, 'max': 2250.0}, {'current': 1717.303, 'min': 1500.0, 'max': 2250.0}, {'current': 3312.578, 'min': 1500.0, 'max': 2250.0}, {'current': 3051.416, 'min': 1500.0, 'max': 2250.0}, {'current': 1658.955, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.383, 'min': 1500.0, 'max': 2250.0}, {'current': 3332.084, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.447, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.494, 'min': 1500.0, 'max': 2250.0}, {'current': 1665.248, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.968, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.861, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.472, 'min': 1500.0, 'max': 2250.0}, {'current': 1792.28, 'min': 1500.0, 'max': 2250.0}, {'current': 3341.098, 'min': 1500.0, 'max': 2250.0}, {'current': 1659.571, 'min': 1500.0, 'max': 2250.0}, {'current': 2090.509, 'min': 1500.0, 'max': 2250.0}, {'current': 3330.01, 'min': 1500.0, 'max': 2250.0}, {'current': 1661.972, 'min': 1500.0, 'max': 2250.0}, {'current': 1661.373, 'min': 1500.0, 'max': 2250.0}, {'current': 2744.439, 'min': 1500.0, 'max': 2250.0}, {'current': 3341.302, 'min': 1500.0, 'max': 2250.0}, {'current': 1931.367, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.659, 'min': 1500.0, 'max': 2250.0}, {'current': 3325.735, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.731, 'min': 1500.0, 'max': 2250.0}, {'current': 1715.888, 'min': 1500.0, 'max': 2250.0}, {'current': 1713.973, 'min': 1500.0, 'max': 2250.0}, {'current': 2361.772, 'min': 1500.0, 'max': 2250.0}, {'current': 1717.358, 'min': 1500.0, 'max': 2250.0}, {'current': 1716.521, 'min': 1500.0, 'max': 2250.0}, {'current': 2341.51, 'min': 1500.0, 'max': 2250.0}, {'current': 1715.513, 'min': 1500.0, 'max': 2250.0}, {'current': 1711.953, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.443, 'min': 1500.0, 'max': 2250.0}, {'current': 1796.101, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.087, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.423, 'min': 1500.0, 'max': 2250.0}, {'current': 1790.765, 'min': 1500.0, 'max': 2250.0}, {'current': 1796.312, 'min': 1500.0, 'max': 2250.0}, {'current': 1792.734, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.642, 'min': 1500.0, 'max': 2250.0}, {'current': 1659.721, 'min': 1500.0, 'max': 2250.0}, {'current': 1663.181, 'min': 1500.0, 'max': 2250.0}, {'current': 3315.724, 'min': 1500.0, 'max': 2250.0}, {'current': 1660.427, 'min': 1500.0, 'max': 2250.0}, {'current': 2966.59, 'min': 1500.0, 'max': 2250.0}, {'current': 1668.725, 'min': 1500.0, 'max': 2250.0}, {'current': 3300.161, 'min': 1500.0, 'max': 2250.0}, {'current': 1671.705, 'min': 1500.0, 'max': 2250.0}, {'current': 1661.312, 'min': 1500.0, 'max': 2250.0}, {'current': 3339.377, 'min': 1500.0, 'max': 2250.0}, {'current': 3333.308, 'min': 1500.0, 'max': 2250.0}, {'current': 1662.747, 'min': 1500.0, 'max': 2250.0}, {'current': 1660.781, 'min': 1500.0, 'max': 2250.0}, {'current': 1660.176, 'min': 1500.0, 'max': 2250.0}, {'current': 1664.39, 'min': 1500.0, 'max': 2250.0}, {'current': 3336.046, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.809, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.079, 'min': 1500.0, 'max': 2250.0}, {'current': 1594.389, 'min': 1500.0, 'max': 2250.0}, {'current': 1622.233, 'min': 1500.0, 'max': 2250.0}, {'current': 2414.464, 'min': 1500.0, 'max': 2250.0}, {'current': 2429.082, 'min': 1500.0, 'max': 2250.0}, {'current': 2278.496, 'min': 1500.0, 'max': 2250.0}, {'current': 2421.333, 'min': 1500.0, 'max': 2250.0}, {'current': 3330.367, 'min': 1500.0, 'max': 2250.0}, {'current': 2412.212, 'min': 1500.0, 'max': 2250.0}, {'current': 3102.925, 'min': 1500.0, 'max': 2250.0}, {'current': 2356.416, 'min': 1500.0, 'max': 2250.0}, {'current': 3312.565, 'min': 1500.0, 'max': 2250.0}, {'current': 3044.183, 'min': 1500.0, 'max': 2250.0}, {'current': 3296.93, 'min': 1500.0, 'max': 2250.0}, {'current': 3060.353, 'min': 1500.0, 'max': 2250.0}, {'current': 2411.85, 'min': 1500.0, 'max': 2250.0}, {'current': 2433.228, 'min': 1500.0, 'max': 2250.0}, {'current': 2464.094, 'min': 1500.0, 'max': 2250.0}, {'current': 2438.304, 'min': 1500.0, 'max': 2250.0}, {'current': 3306.628, 'min': 1500.0, 'max': 2250.0}, {'current': 3318.093, 'min': 1500.0, 'max': 2250.0}, {'current': 2748.09, 'min': 1500.0, 'max': 2250.0}, {'current': 3318.082, 'min': 1500.0, 'max': 2250.0}, {'current': 3316.036, 'min': 1500.0, 'max': 2250.0}, {'current': 3318.945, 'min': 1500.0, 'max': 2250.0}, {'current': 3313.086, 'min': 1500.0, 'max': 2250.0}, {'current': 3340.023, 'min': 1500.0, 'max': 2250.0}, {'current': 3306.8, 'min': 1500.0, 'max': 2250.0}, {'current': 2153.453, 'min': 1500.0, 'max': 2250.0}, {'current': 2690.778, 'min': 1500.0, 'max': 2250.0}, {'current': 2155.725, 'min': 1500.0, 'max': 2250.0}, {'current': 2149.357, 'min': 1500.0, 'max': 2250.0}, {'current': 2147.249, 'min': 1500.0, 'max': 2250.0}, {'current': 2359.311, 'min': 1500.0, 'max': 2250.0}, {'current': 2149.448, 'min': 1500.0, 'max': 2250.0}, {'current': 2160.745, 'min': 1500.0, 'max': 2250.0}, {'current': 2370.02, 'min': 1500.0, 'max': 2250.0}, {'current': 2125.471, 'min': 1500.0, 'max': 2250.0}, {'current': 2125.105, 'min': 1500.0, 'max': 2250.0}, {'current': 1795.448, 'min': 1500.0, 'max': 2250.0}, {'current': 1792.173, 'min': 1500.0, 'max': 2250.0}, {'current': 1795.046, 'min': 1500.0, 'max': 2250.0}, {'current': 1795.921, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.492, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.268, 'min': 1500.0, 'max': 2250.0}, {'current': 1795.235, 'min': 1500.0, 'max': 2250.0}, {'current': 1792.405, 'min': 1500.0, 'max': 2250.0}, {'current': 1657.138, 'min': 1500.0, 'max': 2250.0}, {'current': 1656.563, 'min': 1500.0, 'max': 2250.0}, {'current': 3314.001, 'min': 1500.0, 'max': 2250.0}, {'current': 1656.667, 'min': 1500.0, 'max': 2250.0}, {'current': 2138.701, 'min': 1500.0, 'max': 2250.0}, {'current': 1655.678, 'min': 1500.0, 'max': 2250.0}, {'current': 3312.257, 'min': 1500.0, 'max': 2250.0}, {'current': 1653.093, 'min': 1500.0, 'max': 2250.0}, {'current': 1656.024, 'min': 1500.0, 'max': 2250.0}, {'current': 3297.182, 'min': 1500.0, 'max': 2250.0}, {'current': 3303.429, 'min': 1500.0, 'max': 2250.0}, {'current': 1657.178, 'min': 1500.0, 'max': 2250.0}, {'current': 1670.781, 'min': 1500.0, 'max': 2250.0}, {'current': 1655.864, 'min': 1500.0, 'max': 2250.0}, {'current': 1655.356, 'min': 1500.0, 'max': 2250.0}, {'current': 3328.646, 'min': 1500.0, 'max': 2250.0}, {'current': 1794.456, 'min': 1500.0, 'max': 2250.0}, {'current': 1796.91, 'min': 1500.0, 'max': 2250.0}, {'current': 1795.289, 'min': 1500.0, 'max': 2250.0}, {'current': 1793.508, 'min': 1500.0, 'max': 2250.0}], 'disk': {'/': {'total': 1757.8785285949707, 'used': 1663.5053787231445}}, 'gpu': 'NVIDIA A100-SXM4-80GB', 'gpu_count': 5, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}, {'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}, {'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}, {'name': 'NVIDIA DGX Display', 'memory_total': 4294967296}, {'name': 'NVIDIA A100-SXM4-80GB', 'memory_total': 85899345920}], 'memory': {'total': 503.5396919250488}}
26
+ 2024-05-13 20:52:49,382 INFO HandlerThread:1257957 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-05-13 20:52:49,382 INFO HandlerThread:1257957 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-05-13 20:52:49,382 DEBUG HandlerThread:1257957 [system_info.py:_save_conda():207] Saving list of conda packages installed into the current environment
29
+ 2024-05-13 20:52:49,396 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
30
+ 2024-05-13 20:52:49,406 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
31
+ 2024-05-13 20:52:50,237 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/conda-environment.yaml
32
+ 2024-05-13 20:52:51,425 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
33
+ 2024-05-13 20:52:51,442 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
34
+ 2024-05-13 20:52:51,753 DEBUG HandlerThread:1257957 [system_info.py:_save_conda():222] Saving conda packages done
35
+ 2024-05-13 20:52:51,754 INFO HandlerThread:1257957 [system_monitor.py:probe():229] Finished publishing system info
36
+ 2024-05-13 20:52:51,762 DEBUG SenderThread:1257957 [sender.py:send():378] send: files
37
+ 2024-05-13 20:52:51,762 INFO SenderThread:1257957 [sender.py:_save_file():1389] saving file wandb-metadata.json with policy now
38
+ 2024-05-13 20:52:51,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: python_packages
39
+ 2024-05-13 20:52:51,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
40
+ 2024-05-13 20:52:51,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: python_packages
41
+ 2024-05-13 20:52:51,879 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
42
+ 2024-05-13 20:52:52,070 DEBUG SenderThread:1257957 [sender.py:send():378] send: telemetry
43
+ 2024-05-13 20:52:52,070 DEBUG SenderThread:1257957 [sender.py:send():378] send: config
44
+ 2024-05-13 20:52:52,237 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/conda-environment.yaml
45
+ 2024-05-13 20:52:52,237 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/requirements.txt
46
+ 2024-05-13 20:52:52,237 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/wandb-metadata.json
47
+ 2024-05-13 20:52:52,237 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
48
+ 2024-05-13 20:52:52,289 INFO wandb-upload_0:1257957 [upload_job.py:push():130] Uploaded file /tmp/tmpapx4bmqmwandb/jir2ww2a-wandb-metadata.json
49
+ 2024-05-13 20:52:54,154 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
50
+ 2024-05-13 20:52:54,238 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
51
+ 2024-05-13 20:52:54,341 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
52
+ 2024-05-13 20:52:54,362 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
53
+ 2024-05-13 20:52:56,238 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
54
+ 2024-05-13 20:52:56,402 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
55
+ 2024-05-13 20:52:56,412 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
56
+ 2024-05-13 20:52:58,239 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
57
+ 2024-05-13 20:52:59,391 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
58
+ 2024-05-13 20:52:59,413 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
59
+ 2024-05-13 20:52:59,600 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
60
+ 2024-05-13 20:53:00,240 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
61
+ 2024-05-13 20:53:01,439 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
62
+ 2024-05-13 20:53:01,466 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
63
+ 2024-05-13 20:53:02,240 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
64
+ 2024-05-13 20:53:04,240 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
65
+ 2024-05-13 20:53:04,527 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
66
+ 2024-05-13 20:53:04,551 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
67
+ 2024-05-13 20:53:04,728 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
68
+ 2024-05-13 20:53:06,241 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
69
+ 2024-05-13 20:53:06,618 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
70
+ 2024-05-13 20:53:06,629 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
71
+ 2024-05-13 20:53:06,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
72
+ 2024-05-13 20:53:06,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
73
+ 2024-05-13 20:53:09,448 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
74
+ 2024-05-13 20:53:09,475 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
75
+ 2024-05-13 20:53:09,745 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
76
+ 2024-05-13 20:53:10,242 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
77
+ 2024-05-13 20:53:11,529 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
78
+ 2024-05-13 20:53:11,551 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
79
+ 2024-05-13 20:53:12,242 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
80
+ 2024-05-13 20:53:14,349 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
81
+ 2024-05-13 20:53:14,382 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
82
+ 2024-05-13 20:53:15,697 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
83
+ 2024-05-13 20:53:16,244 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
84
+ 2024-05-13 20:53:16,452 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
85
+ 2024-05-13 20:53:16,465 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
86
+ 2024-05-13 20:53:18,244 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
87
+ 2024-05-13 20:53:19,399 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
88
+ 2024-05-13 20:53:19,429 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
89
+ 2024-05-13 20:53:20,953 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
90
+ 2024-05-13 20:53:21,245 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/config.yaml
91
+ 2024-05-13 20:53:21,469 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
92
+ 2024-05-13 20:53:21,488 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
93
+ 2024-05-13 20:53:21,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
94
+ 2024-05-13 20:53:21,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
95
+ 2024-05-13 20:53:22,246 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
96
+ 2024-05-13 20:53:24,246 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
97
+ 2024-05-13 20:53:24,383 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
98
+ 2024-05-13 20:53:24,407 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
99
+ 2024-05-13 20:53:26,755 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
100
+ 2024-05-13 20:53:28,202 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
101
+ 2024-05-13 20:53:28,227 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
102
+ 2024-05-13 20:53:28,242 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
103
+ 2024-05-13 20:53:30,248 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
104
+ 2024-05-13 20:53:31,218 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
105
+ 2024-05-13 20:53:31,240 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
106
+ 2024-05-13 20:53:31,787 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
107
+ 2024-05-13 20:53:32,249 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
108
+ 2024-05-13 20:53:33,272 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
109
+ 2024-05-13 20:53:33,297 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
110
+ 2024-05-13 20:53:36,218 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
111
+ 2024-05-13 20:53:36,250 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
112
+ 2024-05-13 20:53:36,253 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
113
+ 2024-05-13 20:53:36,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
114
+ 2024-05-13 20:53:36,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
115
+ 2024-05-13 20:53:37,092 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
116
+ 2024-05-13 20:53:38,250 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
117
+ 2024-05-13 20:53:38,303 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
118
+ 2024-05-13 20:53:38,313 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
119
+ 2024-05-13 20:53:40,251 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
120
+ 2024-05-13 20:53:41,370 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
121
+ 2024-05-13 20:53:41,397 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
122
+ 2024-05-13 20:53:42,166 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
123
+ 2024-05-13 20:53:42,252 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
124
+ 2024-05-13 20:53:43,423 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
125
+ 2024-05-13 20:53:43,441 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
126
+ 2024-05-13 20:53:44,252 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
127
+ 2024-05-13 20:53:46,253 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
128
+ 2024-05-13 20:53:46,761 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
129
+ 2024-05-13 20:53:46,792 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
130
+ 2024-05-13 20:53:47,210 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
131
+ 2024-05-13 20:53:48,254 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
132
+ 2024-05-13 20:53:48,869 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
133
+ 2024-05-13 20:53:48,922 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
134
+ 2024-05-13 20:53:49,341 DEBUG SystemMonitor:1257957 [system_monitor.py:_start():172] Starting system metrics aggregation loop
135
+ 2024-05-13 20:53:49,346 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
136
+ 2024-05-13 20:53:50,254 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
137
+ 2024-05-13 20:53:50,950 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
138
+ 2024-05-13 20:53:50,971 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
139
+ 2024-05-13 20:53:51,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
140
+ 2024-05-13 20:53:51,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
141
+ 2024-05-13 20:53:52,255 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
142
+ 2024-05-13 20:53:52,329 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
143
+ 2024-05-13 20:53:53,011 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
144
+ 2024-05-13 20:53:53,045 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
145
+ 2024-05-13 20:53:54,255 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
146
+ 2024-05-13 20:53:56,224 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
147
+ 2024-05-13 20:53:56,235 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
148
+ 2024-05-13 20:53:56,256 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
149
+ 2024-05-13 20:53:57,574 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
150
+ 2024-05-13 20:53:58,257 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
151
+ 2024-05-13 20:53:58,298 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
152
+ 2024-05-13 20:53:58,323 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
153
+ 2024-05-13 20:54:00,257 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
154
+ 2024-05-13 20:54:00,372 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
155
+ 2024-05-13 20:54:00,381 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
156
+ 2024-05-13 20:54:02,257 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
157
+ 2024-05-13 20:54:02,473 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
158
+ 2024-05-13 20:54:02,486 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
159
+ 2024-05-13 20:54:02,652 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
160
+ 2024-05-13 20:54:04,260 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
161
+ 2024-05-13 20:54:06,260 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
162
+ 2024-05-13 20:54:06,527 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
163
+ 2024-05-13 20:54:06,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
164
+ 2024-05-13 20:54:06,875 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
165
+ 2024-05-13 20:54:07,729 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
166
+ 2024-05-13 20:54:07,954 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
167
+ 2024-05-13 20:54:07,968 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
168
+ 2024-05-13 20:54:08,261 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
169
+ 2024-05-13 20:54:09,996 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
170
+ 2024-05-13 20:54:10,009 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
171
+ 2024-05-13 20:54:10,262 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
172
+ 2024-05-13 20:54:12,063 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
173
+ 2024-05-13 20:54:12,083 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
174
+ 2024-05-13 20:54:12,262 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
175
+ 2024-05-13 20:54:12,769 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
176
+ 2024-05-13 20:54:14,119 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
177
+ 2024-05-13 20:54:14,133 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
178
+ 2024-05-13 20:54:14,263 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
179
+ 2024-05-13 20:54:16,172 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
180
+ 2024-05-13 20:54:16,196 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
181
+ 2024-05-13 20:54:16,263 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
182
+ 2024-05-13 20:54:17,810 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
183
+ 2024-05-13 20:54:18,232 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
184
+ 2024-05-13 20:54:18,250 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
185
+ 2024-05-13 20:54:18,264 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
186
+ 2024-05-13 20:54:19,348 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
187
+ 2024-05-13 20:54:20,264 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
188
+ 2024-05-13 20:54:20,272 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
189
+ 2024-05-13 20:54:20,282 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
190
+ 2024-05-13 20:54:21,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
191
+ 2024-05-13 20:54:21,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
192
+ 2024-05-13 20:54:22,266 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
193
+ 2024-05-13 20:54:22,303 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
194
+ 2024-05-13 20:54:22,313 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
195
+ 2024-05-13 20:54:22,858 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
196
+ 2024-05-13 20:54:24,266 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
197
+ 2024-05-13 20:54:24,334 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
198
+ 2024-05-13 20:54:24,349 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
199
+ 2024-05-13 20:54:26,267 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
200
+ 2024-05-13 20:54:26,379 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
201
+ 2024-05-13 20:54:26,401 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
202
+ 2024-05-13 20:54:27,908 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
203
+ 2024-05-13 20:54:28,267 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
204
+ 2024-05-13 20:54:28,422 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
205
+ 2024-05-13 20:54:28,440 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
206
+ 2024-05-13 20:54:30,268 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
207
+ 2024-05-13 20:54:30,472 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
208
+ 2024-05-13 20:54:30,484 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
209
+ 2024-05-13 20:54:32,269 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
210
+ 2024-05-13 20:54:32,538 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
211
+ 2024-05-13 20:54:32,552 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
212
+ 2024-05-13 20:54:33,018 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
213
+ 2024-05-13 20:54:34,269 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
214
+ 2024-05-13 20:54:34,686 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
215
+ 2024-05-13 20:54:34,726 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
216
+ 2024-05-13 20:54:36,269 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
217
+ 2024-05-13 20:54:36,860 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
218
+ 2024-05-13 20:54:36,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
219
+ 2024-05-13 20:54:36,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
220
+ 2024-05-13 20:54:36,880 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
221
+ 2024-05-13 20:54:38,057 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
222
+ 2024-05-13 20:54:38,270 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
223
+ 2024-05-13 20:54:40,270 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
224
+ 2024-05-13 20:54:40,368 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
225
+ 2024-05-13 20:54:41,242 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
226
+ 2024-05-13 20:54:41,259 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
227
+ 2024-05-13 20:54:42,271 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
228
+ 2024-05-13 20:54:43,097 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
229
+ 2024-05-13 20:54:43,484 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
230
+ 2024-05-13 20:54:43,506 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
231
+ 2024-05-13 20:54:44,272 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
232
+ 2024-05-13 20:54:45,567 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
233
+ 2024-05-13 20:54:45,583 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
234
+ 2024-05-13 20:54:46,272 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
235
+ 2024-05-13 20:54:47,618 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
236
+ 2024-05-13 20:54:47,632 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
237
+ 2024-05-13 20:54:48,273 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
238
+ 2024-05-13 20:54:48,308 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
239
+ 2024-05-13 20:54:49,350 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
240
+ 2024-05-13 20:54:49,662 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
241
+ 2024-05-13 20:54:49,677 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
242
+ 2024-05-13 20:54:50,273 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
243
+ 2024-05-13 20:54:51,727 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
244
+ 2024-05-13 20:54:51,748 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
245
+ 2024-05-13 20:54:51,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
246
+ 2024-05-13 20:54:51,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
247
+ 2024-05-13 20:54:52,274 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
248
+ 2024-05-13 20:54:53,618 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
249
+ 2024-05-13 20:54:53,953 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
250
+ 2024-05-13 20:54:53,975 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
251
+ 2024-05-13 20:54:54,275 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
252
+ 2024-05-13 20:54:56,117 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
253
+ 2024-05-13 20:54:56,151 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
254
+ 2024-05-13 20:54:56,275 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
255
+ 2024-05-13 20:54:58,276 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
256
+ 2024-05-13 20:54:58,370 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
257
+ 2024-05-13 20:54:58,407 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
258
+ 2024-05-13 20:54:58,644 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
259
+ 2024-05-13 20:55:00,276 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
260
+ 2024-05-13 20:55:00,559 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
261
+ 2024-05-13 20:55:00,601 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
262
+ 2024-05-13 20:55:02,277 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
263
+ 2024-05-13 20:55:02,776 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
264
+ 2024-05-13 20:55:02,807 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
265
+ 2024-05-13 20:55:03,686 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
266
+ 2024-05-13 20:55:04,277 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
267
+ 2024-05-13 20:55:04,969 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
268
+ 2024-05-13 20:55:05,009 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
269
+ 2024-05-13 20:55:06,278 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
270
+ 2024-05-13 20:55:06,875 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
271
+ 2024-05-13 20:55:06,875 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
272
+ 2024-05-13 20:55:07,138 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
273
+ 2024-05-13 20:55:07,162 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
274
+ 2024-05-13 20:55:08,278 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
275
+ 2024-05-13 20:55:08,783 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
276
+ 2024-05-13 20:55:09,293 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
277
+ 2024-05-13 20:55:09,336 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
278
+ 2024-05-13 20:55:10,279 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
279
+ 2024-05-13 20:55:11,446 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
280
+ 2024-05-13 20:55:11,486 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
281
+ 2024-05-13 20:55:12,280 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
282
+ 2024-05-13 20:55:13,807 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
283
+ 2024-05-13 20:55:14,280 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
284
+ 2024-05-13 20:55:15,679 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
285
+ 2024-05-13 20:55:15,736 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
286
+ 2024-05-13 20:55:15,758 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
287
+ 2024-05-13 20:55:16,281 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
288
+ 2024-05-13 20:55:17,815 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
289
+ 2024-05-13 20:55:17,846 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
290
+ 2024-05-13 20:55:18,281 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
291
+ 2024-05-13 20:55:18,856 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
292
+ 2024-05-13 20:55:19,353 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
293
+ 2024-05-13 20:55:19,930 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
294
+ 2024-05-13 20:55:19,956 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
295
+ 2024-05-13 20:55:20,282 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
296
+ 2024-05-13 20:55:21,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
297
+ 2024-05-13 20:55:21,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
298
+ 2024-05-13 20:55:22,142 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
299
+ 2024-05-13 20:55:22,170 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
300
+ 2024-05-13 20:55:22,282 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
301
+ 2024-05-13 20:55:23,933 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
302
+ 2024-05-13 20:55:24,253 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
303
+ 2024-05-13 20:55:24,274 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
304
+ 2024-05-13 20:55:24,283 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
305
+ 2024-05-13 20:55:26,283 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
306
+ 2024-05-13 20:55:26,332 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
307
+ 2024-05-13 20:55:26,343 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
308
+ 2024-05-13 20:55:28,285 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
309
+ 2024-05-13 20:55:28,419 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
310
+ 2024-05-13 20:55:28,451 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
311
+ 2024-05-13 20:55:29,032 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
312
+ 2024-05-13 20:55:30,285 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
313
+ 2024-05-13 20:55:30,624 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
314
+ 2024-05-13 20:55:30,647 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
315
+ 2024-05-13 20:55:32,286 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
316
+ 2024-05-13 20:55:32,797 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
317
+ 2024-05-13 20:55:32,841 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
318
+ 2024-05-13 20:55:34,229 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
319
+ 2024-05-13 20:55:34,286 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
320
+ 2024-05-13 20:55:35,011 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
321
+ 2024-05-13 20:55:35,035 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
322
+ 2024-05-13 20:55:36,287 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
323
+ 2024-05-13 20:55:36,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
324
+ 2024-05-13 20:55:36,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
325
+ 2024-05-13 20:55:37,215 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
326
+ 2024-05-13 20:55:37,236 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
327
+ 2024-05-13 20:55:38,287 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
328
+ 2024-05-13 20:55:39,254 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
329
+ 2024-05-13 20:55:39,435 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
330
+ 2024-05-13 20:55:39,455 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
331
+ 2024-05-13 20:55:40,288 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
332
+ 2024-05-13 20:55:41,535 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
333
+ 2024-05-13 20:55:41,547 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
334
+ 2024-05-13 20:55:42,289 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
335
+ 2024-05-13 20:55:43,627 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
336
+ 2024-05-13 20:55:43,638 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
337
+ 2024-05-13 20:55:44,279 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
338
+ 2024-05-13 20:55:44,289 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
339
+ 2024-05-13 20:55:45,711 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
340
+ 2024-05-13 20:55:45,770 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
341
+ 2024-05-13 20:55:46,290 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
342
+ 2024-05-13 20:55:48,290 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
343
+ 2024-05-13 20:55:49,355 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
344
+ 2024-05-13 20:55:49,356 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
345
+ 2024-05-13 20:55:50,017 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
346
+ 2024-05-13 20:55:50,063 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
347
+ 2024-05-13 20:55:50,091 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
348
+ 2024-05-13 20:55:50,291 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
349
+ 2024-05-13 20:55:51,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
350
+ 2024-05-13 20:55:51,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
351
+ 2024-05-13 20:55:52,211 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
352
+ 2024-05-13 20:55:52,242 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
353
+ 2024-05-13 20:55:52,291 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
354
+ 2024-05-13 20:55:54,292 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
355
+ 2024-05-13 20:55:54,379 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
356
+ 2024-05-13 20:55:54,387 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
357
+ 2024-05-13 20:55:54,419 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
358
+ 2024-05-13 20:55:56,292 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
359
+ 2024-05-13 20:55:56,566 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
360
+ 2024-05-13 20:55:56,604 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
361
+ 2024-05-13 20:55:58,293 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
362
+ 2024-05-13 20:55:58,654 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
363
+ 2024-05-13 20:55:58,674 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
364
+ 2024-05-13 20:55:59,594 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
365
+ 2024-05-13 20:56:00,294 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
366
+ 2024-05-13 20:56:00,708 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
367
+ 2024-05-13 20:56:00,723 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
368
+ 2024-05-13 20:56:02,294 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
369
+ 2024-05-13 20:56:02,766 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
370
+ 2024-05-13 20:56:02,781 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
371
+ 2024-05-13 20:56:04,294 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
372
+ 2024-05-13 20:56:04,633 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
373
+ 2024-05-13 20:56:04,818 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
374
+ 2024-05-13 20:56:04,829 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
375
+ 2024-05-13 20:56:06,295 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
376
+ 2024-05-13 20:56:06,853 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
377
+ 2024-05-13 20:56:06,862 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
378
+ 2024-05-13 20:56:06,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
379
+ 2024-05-13 20:56:06,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
380
+ 2024-05-13 20:56:08,295 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
381
+ 2024-05-13 20:56:08,898 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
382
+ 2024-05-13 20:56:08,916 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
383
+ 2024-05-13 20:56:09,783 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
384
+ 2024-05-13 20:56:10,296 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
385
+ 2024-05-13 20:56:10,950 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
386
+ 2024-05-13 20:56:11,697 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
387
+ 2024-05-13 20:56:12,297 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
388
+ 2024-05-13 20:56:14,054 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
389
+ 2024-05-13 20:56:14,064 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
390
+ 2024-05-13 20:56:14,297 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
391
+ 2024-05-13 20:56:14,910 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
392
+ 2024-05-13 20:56:16,101 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
393
+ 2024-05-13 20:56:16,114 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
394
+ 2024-05-13 20:56:16,298 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
395
+ 2024-05-13 20:56:18,143 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
396
+ 2024-05-13 20:56:18,154 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
397
+ 2024-05-13 20:56:18,298 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
398
+ 2024-05-13 20:56:19,357 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
399
+ 2024-05-13 20:56:19,989 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
400
+ 2024-05-13 20:56:20,194 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
401
+ 2024-05-13 20:56:20,212 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
402
+ 2024-05-13 20:56:20,299 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
403
+ 2024-05-13 20:56:21,876 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: stop_status
404
+ 2024-05-13 20:56:21,876 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: stop_status
405
+ 2024-05-13 20:56:22,299 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
406
+ 2024-05-13 20:56:24,300 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
407
+ 2024-05-13 20:56:24,913 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
408
+ 2024-05-13 20:56:25,185 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
409
+ 2024-05-13 20:56:25,189 DEBUG SenderThread:1257957 [sender.py:send():378] send: exit
410
+ 2024-05-13 20:56:25,189 INFO SenderThread:1257957 [sender.py:send_exit():585] handling exit code: 1
411
+ 2024-05-13 20:56:25,189 INFO SenderThread:1257957 [sender.py:send_exit():587] handling runtime: 215
412
+ 2024-05-13 20:56:25,189 INFO SenderThread:1257957 [sender.py:_save_file():1389] saving file wandb-summary.json with policy end
413
+ 2024-05-13 20:56:25,189 INFO SenderThread:1257957 [sender.py:send_exit():593] send defer
414
+ 2024-05-13 20:56:25,189 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
415
+ 2024-05-13 20:56:25,189 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 0
416
+ 2024-05-13 20:56:25,190 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
417
+ 2024-05-13 20:56:25,190 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 0
418
+ 2024-05-13 20:56:25,190 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 1
419
+ 2024-05-13 20:56:25,190 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
420
+ 2024-05-13 20:56:25,190 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 1
421
+ 2024-05-13 20:56:25,190 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
422
+ 2024-05-13 20:56:25,190 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 1
423
+ 2024-05-13 20:56:25,190 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 2
424
+ 2024-05-13 20:56:25,190 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
425
+ 2024-05-13 20:56:25,190 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 2
426
+ 2024-05-13 20:56:25,190 INFO HandlerThread:1257957 [system_monitor.py:finish():203] Stopping system monitor
427
+ 2024-05-13 20:56:25,190 DEBUG SystemMonitor:1257957 [system_monitor.py:_start():179] Finished system metrics aggregation loop
428
+ 2024-05-13 20:56:25,190 DEBUG SystemMonitor:1257957 [system_monitor.py:_start():183] Publishing last batch of metrics
429
+ 2024-05-13 20:56:25,193 INFO HandlerThread:1257957 [interfaces.py:finish():200] Joined cpu monitor
430
+ 2024-05-13 20:56:25,193 INFO HandlerThread:1257957 [interfaces.py:finish():200] Joined disk monitor
431
+ 2024-05-13 20:56:25,231 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
432
+ 2024-05-13 20:56:25,252 ERROR gpu :1257957 [interfaces.py:monitor():142] Failed to sample metric: Not Supported
433
+ 2024-05-13 20:56:25,300 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_created():271] file/dir created: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/wandb-summary.json
434
+ 2024-05-13 20:56:26,300 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
435
+ 2024-05-13 20:56:26,956 ERROR gpu :1257957 [interfaces.py:aggregate():159] Failed to serialize metric: division by zero
436
+ 2024-05-13 20:56:26,956 INFO HandlerThread:1257957 [interfaces.py:finish():200] Joined gpu monitor
437
+ 2024-05-13 20:56:26,956 INFO HandlerThread:1257957 [interfaces.py:finish():200] Joined memory monitor
438
+ 2024-05-13 20:56:26,956 INFO HandlerThread:1257957 [interfaces.py:finish():200] Joined network monitor
439
+ 2024-05-13 20:56:26,956 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: poll_exit
440
+ 2024-05-13 20:56:26,958 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
441
+ 2024-05-13 20:56:26,958 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 2
442
+ 2024-05-13 20:56:26,958 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 3
443
+ 2024-05-13 20:56:26,958 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
444
+ 2024-05-13 20:56:26,958 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 3
445
+ 2024-05-13 20:56:26,959 DEBUG SenderThread:1257957 [sender.py:send():378] send: stats
446
+ 2024-05-13 20:56:26,959 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: poll_exit
447
+ 2024-05-13 20:56:26,960 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
448
+ 2024-05-13 20:56:26,960 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 3
449
+ 2024-05-13 20:56:26,960 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 4
450
+ 2024-05-13 20:56:26,960 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
451
+ 2024-05-13 20:56:26,960 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 4
452
+ 2024-05-13 20:56:26,960 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
453
+ 2024-05-13 20:56:26,960 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 4
454
+ 2024-05-13 20:56:26,960 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 5
455
+ 2024-05-13 20:56:26,960 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
456
+ 2024-05-13 20:56:26,960 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 5
457
+ 2024-05-13 20:56:26,961 DEBUG SenderThread:1257957 [sender.py:send():378] send: summary
458
+ 2024-05-13 20:56:26,961 INFO SenderThread:1257957 [sender.py:_save_file():1389] saving file wandb-summary.json with policy end
459
+ 2024-05-13 20:56:26,961 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
460
+ 2024-05-13 20:56:26,961 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 5
461
+ 2024-05-13 20:56:26,961 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 6
462
+ 2024-05-13 20:56:26,961 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
463
+ 2024-05-13 20:56:26,961 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 6
464
+ 2024-05-13 20:56:26,961 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
465
+ 2024-05-13 20:56:26,961 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 6
466
+ 2024-05-13 20:56:26,961 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 7
467
+ 2024-05-13 20:56:26,962 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: status_report
468
+ 2024-05-13 20:56:26,962 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
469
+ 2024-05-13 20:56:26,962 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 7
470
+ 2024-05-13 20:56:26,962 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
471
+ 2024-05-13 20:56:26,962 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 7
472
+ 2024-05-13 20:56:27,189 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: poll_exit
473
+ 2024-05-13 20:56:27,301 INFO Thread-12 :1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/wandb-summary.json
474
+ 2024-05-13 20:56:28,505 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 8
475
+ 2024-05-13 20:56:28,506 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: poll_exit
476
+ 2024-05-13 20:56:28,506 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
477
+ 2024-05-13 20:56:28,506 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 8
478
+ 2024-05-13 20:56:28,506 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
479
+ 2024-05-13 20:56:28,506 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 8
480
+ 2024-05-13 20:56:28,506 INFO SenderThread:1257957 [job_builder.py:build():432] Attempting to build job artifact
481
+ 2024-05-13 20:56:28,507 INFO SenderThread:1257957 [job_builder.py:_get_source_type():565] is repo sourced job
482
+ 2024-05-13 20:56:28,530 INFO SenderThread:1257957 [job_builder.py:build():541] adding wandb-job metadata file
483
+ 2024-05-13 20:56:28,532 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 9
484
+ 2024-05-13 20:56:28,532 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
485
+ 2024-05-13 20:56:28,532 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 9
486
+ 2024-05-13 20:56:28,533 DEBUG SenderThread:1257957 [sender.py:send():378] send: artifact
487
+ 2024-05-13 20:56:29,160 INFO SenderThread:1257957 [sender.py:send_artifact():1467] sent artifact job-https___huggingface.co_sanchit-gandhi_parler-tts-mini-v0.1-expresso-concatenated_run_parler_tts_training.py - {'id': 'QXJ0aWZhY3Q6ODM0NzI5Njgw', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjE3NDIzMTI1Mw==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6ODM0NzI5Njgw', 'versionIndex': 0}}}
488
+ 2024-05-13 20:56:29,160 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
489
+ 2024-05-13 20:56:29,160 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 9
490
+ 2024-05-13 20:56:29,160 INFO SenderThread:1257957 [dir_watcher.py:finish():358] shutting down directory watcher
491
+ 2024-05-13 20:56:29,191 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: poll_exit
492
+ 2024-05-13 20:56:29,302 INFO SenderThread:1257957 [dir_watcher.py:_on_file_modified():288] file/dir modified: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
493
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():388] scan: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files
494
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/conda-environment.yaml conda-environment.yaml
495
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/wandb-summary.json wandb-summary.json
496
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log output.log
497
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/requirements.txt requirements.txt
498
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/config.yaml config.yaml
499
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [dir_watcher.py:finish():402] scan save: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/wandb-metadata.json wandb-metadata.json
500
+ 2024-05-13 20:56:29,303 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 10
501
+ 2024-05-13 20:56:29,303 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: poll_exit
502
+ 2024-05-13 20:56:29,304 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
503
+ 2024-05-13 20:56:29,304 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 10
504
+ 2024-05-13 20:56:29,310 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
505
+ 2024-05-13 20:56:29,310 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 10
506
+ 2024-05-13 20:56:29,310 INFO SenderThread:1257957 [file_pusher.py:finish():169] shutting down file pusher
507
+ 2024-05-13 20:56:29,628 INFO wandb-upload_1:1257957 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/wandb-summary.json
508
+ 2024-05-13 20:56:29,688 INFO wandb-upload_0:1257957 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/conda-environment.yaml
509
+ 2024-05-13 20:56:29,743 INFO wandb-upload_4:1257957 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/config.yaml
510
+ 2024-05-13 20:56:29,770 INFO wandb-upload_3:1257957 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/requirements.txt
511
+ 2024-05-13 20:56:29,817 INFO wandb-upload_2:1257957 [upload_job.py:push():130] Uploaded file /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/files/output.log
512
+ 2024-05-13 20:56:30,017 INFO Thread-11 (_thread_body):1257957 [sender.py:transition_state():613] send defer: 11
513
+ 2024-05-13 20:56:30,018 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
514
+ 2024-05-13 20:56:30,018 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 11
515
+ 2024-05-13 20:56:30,018 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
516
+ 2024-05-13 20:56:30,018 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 11
517
+ 2024-05-13 20:56:30,018 INFO SenderThread:1257957 [file_pusher.py:join():175] waiting for file pusher
518
+ 2024-05-13 20:56:30,019 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 12
519
+ 2024-05-13 20:56:30,019 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
520
+ 2024-05-13 20:56:30,019 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 12
521
+ 2024-05-13 20:56:30,019 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
522
+ 2024-05-13 20:56:30,019 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 12
523
+ 2024-05-13 20:56:30,019 INFO SenderThread:1257957 [file_stream.py:finish():601] file stream finish called
524
+ 2024-05-13 20:56:30,156 INFO SenderThread:1257957 [file_stream.py:finish():605] file stream finish is done
525
+ 2024-05-13 20:56:30,156 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 13
526
+ 2024-05-13 20:56:30,156 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
527
+ 2024-05-13 20:56:30,156 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 13
528
+ 2024-05-13 20:56:30,156 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
529
+ 2024-05-13 20:56:30,156 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 13
530
+ 2024-05-13 20:56:30,157 INFO SenderThread:1257957 [sender.py:transition_state():613] send defer: 14
531
+ 2024-05-13 20:56:30,157 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: defer
532
+ 2024-05-13 20:56:30,157 INFO HandlerThread:1257957 [handler.py:handle_request_defer():184] handle defer: 14
533
+ 2024-05-13 20:56:30,157 DEBUG SenderThread:1257957 [sender.py:send():378] send: final
534
+ 2024-05-13 20:56:30,157 DEBUG SenderThread:1257957 [sender.py:send():378] send: footer
535
+ 2024-05-13 20:56:30,157 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: defer
536
+ 2024-05-13 20:56:30,157 INFO SenderThread:1257957 [sender.py:send_request_defer():609] handle sender defer: 14
537
+ 2024-05-13 20:56:30,158 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: poll_exit
538
+ 2024-05-13 20:56:30,158 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: poll_exit
539
+ 2024-05-13 20:56:30,159 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: poll_exit
540
+ 2024-05-13 20:56:30,159 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: poll_exit
541
+ 2024-05-13 20:56:30,159 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: server_info
542
+ 2024-05-13 20:56:30,159 DEBUG SenderThread:1257957 [sender.py:send_request():405] send_request: server_info
543
+ 2024-05-13 20:56:30,159 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: get_summary
544
+ 2024-05-13 20:56:30,161 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: sampled_history
545
+ 2024-05-13 20:56:30,161 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: internal_messages
546
+ 2024-05-13 20:56:30,298 INFO MainThread:1257957 [wandb_run.py:_footer_history_summary_info():3994] rendering history
547
+ 2024-05-13 20:56:30,299 INFO MainThread:1257957 [wandb_run.py:_footer_history_summary_info():4026] rendering summary
548
+ 2024-05-13 20:56:30,299 INFO MainThread:1257957 [wandb_run.py:_footer_sync_info():3953] logging synced files
549
+ 2024-05-13 20:56:30,299 DEBUG HandlerThread:1257957 [handler.py:handle_request():158] handle_request: shutdown
550
+ 2024-05-13 20:56:30,299 INFO HandlerThread:1257957 [handler.py:finish():882] shutting down handler
551
+ 2024-05-13 20:56:31,159 INFO WriterThread:1257957 [datastore.py:close():296] close: /raid/sanchit/parler-tts-mini-v0.1-expresso-concatenated/wandb/run-20240513_205248-d781ddha/run-d781ddha.wandb
552
+ 2024-05-13 20:56:31,298 INFO SenderThread:1257957 [sender.py:finish():1545] shutting down sender
553
+ 2024-05-13 20:56:31,298 INFO SenderThread:1257957 [file_pusher.py:finish():169] shutting down file pusher
554
+ 2024-05-13 20:56:31,299 INFO SenderThread:1257957 [file_pusher.py:join():175] waiting for file pusher