sanchit-gandhi HF staff commited on
Commit
e1dd1ab
1 Parent(s): c65f244

Model save

Browse files
config.json CHANGED
@@ -182,7 +182,7 @@
182
  "forced_eos_token_id": null,
183
  "gradient_checkpointing": false,
184
  "hidden_act": "gelu",
185
- "hidden_dropout": 0.04999238095195753,
186
  "hidden_size": 1024,
187
  "id2label": {
188
  "0": "LABEL_0",
182
  "forced_eos_token_id": null,
183
  "gradient_checkpointing": false,
184
  "hidden_act": "gelu",
185
+ "hidden_dropout": 0.1043496520848404,
186
  "hidden_size": 1024,
187
  "id2label": {
188
  "0": "LABEL_0",
emissions.csv CHANGED
@@ -2,3 +2,4 @@ timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_
2
  2022-05-05T16:24:44,d1ee5246-da5a-4bbb-a48e-edf347d44452,codecarbon,93789.2286529541,4.4657849913272685,7.885899684491028,USA,USA,Iowa,Y,gcp,us-central1
3
  2022-05-05T16:37:38,3042b653-af40-4cc1-8eec-061528a59ed7,codecarbon,10.825525283813477,0.00029573218373195776,0.0005222182301464909,USA,USA,Iowa,Y,gcp,us-central1
4
  2022-05-05T16:42:47,89692b38-e983-4664-a932-8df61002625d,codecarbon,4.291411638259888,0.00019785712188713343,0.00034938569995962115,USA,USA,Iowa,Y,gcp,us-central1
 
2
  2022-05-05T16:24:44,d1ee5246-da5a-4bbb-a48e-edf347d44452,codecarbon,93789.2286529541,4.4657849913272685,7.885899684491028,USA,USA,Iowa,Y,gcp,us-central1
3
  2022-05-05T16:37:38,3042b653-af40-4cc1-8eec-061528a59ed7,codecarbon,10.825525283813477,0.00029573218373195776,0.0005222182301464909,USA,USA,Iowa,Y,gcp,us-central1
4
  2022-05-05T16:42:47,89692b38-e983-4664-a932-8df61002625d,codecarbon,4.291411638259888,0.00019785712188713343,0.00034938569995962115,USA,USA,Iowa,Y,gcp,us-central1
5
+ 2022-05-05T16:45:58,ef3cf220-dd7d-4101-8f2e-5b58125d0f6a,codecarbon,4.533631801605225,0.00011988175208065735,0.0002116930109141045,USA,USA,Iowa,Y,gcp,us-central1
runs/May05_16-44-56_sanchit--v100/1651769149.9394004/events.out.tfevents.1651769149.sanchit--v100.66677.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf44e1ee4066c7afb2c0126abffcdc62708cfaf679d440482c5b10bdfa09dfed
3
+ size 5184
runs/May05_16-44-56_sanchit--v100/events.out.tfevents.1651769149.sanchit--v100.66677.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf35e831e3ed592f3e7d3c82817cc2768013f4c0662d3bfcd1223cc883fbf3f
3
+ size 10206
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:411ae1993bc0f5fc4d6235ea137d964a9deca9dd463f83c15631f3830fb11955
3
  size 3247
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5922512d0682bbbfa84f9bc7a3647d74eee7510b6f93beb8c552e9e75f8ce9d3
3
  size 3247
wandb/debug-cli.log CHANGED
@@ -132,3 +132,29 @@
132
  warmup_steps: 500
133
  2022-05-05 16:41:33 INFO About to run command: python3 run_xtreme_s.py --overwrite_output_dir --freeze_feature_encoder --gradient_checkpointing --predict_with_generate --fp16 --group_by_length --do_train --do_eval --load_best_model_at_end --push_to_hub --use_auth_token --eval_split_name=test --eval_steps=500 --evaluation_strategy=steps --generation_max_length=40 --generation_num_beams=1 --gradient_accumulation_steps=8 --greater_is_better=True --hidden_dropout=0.04999238095195753 --language=fr.en --learning_rate=0.0007702133913256148 --logging_steps=1 --max_duration_in_seconds=20 --metric_for_best_model=bleu --model_name_or_path=./ --num_train_epochs=3 --output_dir=./ --per_device_eval_batch_size=8 --per_device_train_batch_size=8 --save_steps=500 --task=covost2 --warmup_steps=500
134
  2022-05-05 16:41:38 INFO Running runs: ['1zwo1c2h']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  warmup_steps: 500
133
  2022-05-05 16:41:33 INFO About to run command: python3 run_xtreme_s.py --overwrite_output_dir --freeze_feature_encoder --gradient_checkpointing --predict_with_generate --fp16 --group_by_length --do_train --do_eval --load_best_model_at_end --push_to_hub --use_auth_token --eval_split_name=test --eval_steps=500 --evaluation_strategy=steps --generation_max_length=40 --generation_num_beams=1 --gradient_accumulation_steps=8 --greater_is_better=True --hidden_dropout=0.04999238095195753 --language=fr.en --learning_rate=0.0007702133913256148 --logging_steps=1 --max_duration_in_seconds=20 --metric_for_best_model=bleu --model_name_or_path=./ --num_train_epochs=3 --output_dir=./ --per_device_eval_batch_size=8 --per_device_train_batch_size=8 --save_steps=500 --task=covost2 --warmup_steps=500
134
  2022-05-05 16:41:38 INFO Running runs: ['1zwo1c2h']
135
+ 2022-05-05 16:44:52 INFO Cleaning up finished run: 1zwo1c2h
136
+ 2022-05-05 16:44:53 INFO Agent received command: run
137
+ 2022-05-05 16:44:53 INFO Agent starting run with config:
138
+ eval_split_name: test
139
+ eval_steps: 500
140
+ evaluation_strategy: steps
141
+ generation_max_length: 40
142
+ generation_num_beams: 1
143
+ gradient_accumulation_steps: 8
144
+ greater_is_better: True
145
+ hidden_dropout: 0.1043496520848404
146
+ language: fr.en
147
+ learning_rate: 0.00023215434357723729
148
+ logging_steps: 1
149
+ max_duration_in_seconds: 20
150
+ metric_for_best_model: bleu
151
+ model_name_or_path: ./
152
+ num_train_epochs: 3
153
+ output_dir: ./
154
+ per_device_eval_batch_size: 8
155
+ per_device_train_batch_size: 8
156
+ save_steps: 500
157
+ task: covost2
158
+ warmup_steps: 500
159
+ 2022-05-05 16:44:53 INFO About to run command: python3 run_xtreme_s.py --overwrite_output_dir --freeze_feature_encoder --gradient_checkpointing --predict_with_generate --fp16 --group_by_length --do_train --do_eval --load_best_model_at_end --push_to_hub --use_auth_token --eval_split_name=test --eval_steps=500 --evaluation_strategy=steps --generation_max_length=40 --generation_num_beams=1 --gradient_accumulation_steps=8 --greater_is_better=True --hidden_dropout=0.1043496520848404 --language=fr.en --learning_rate=0.00023215434357723729 --logging_steps=1 --max_duration_in_seconds=20 --metric_for_best_model=bleu --model_name_or_path=./ --num_train_epochs=3 --output_dir=./ --per_device_eval_batch_size=8 --per_device_train_batch_size=8 --save_steps=500 --task=covost2 --warmup_steps=500
160
+ 2022-05-05 16:44:58 INFO Running runs: ['0sgg5024']
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20220505_164238-1zwo1c2h/logs/debug-internal.log
1
+ run-20220505_164550-0sgg5024/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20220505_164238-1zwo1c2h/logs/debug.log
1
+ run-20220505_164550-0sgg5024/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20220505_164238-1zwo1c2h
1
+ run-20220505_164550-0sgg5024
wandb/run-20220505_164238-1zwo1c2h/files/config.yaml CHANGED
@@ -52,7 +52,16 @@ _wandb:
52
  - 5
53
  - 11
54
  - 12
 
 
 
 
 
 
 
55
  3:
 
 
56
  - 13
57
  4: 3.9.5
58
  5: 0.12.10
52
  - 5
53
  - 11
54
  - 12
55
+ 2:
56
+ - 1
57
+ - 2
58
+ - 3
59
+ - 5
60
+ - 11
61
+ - 12
62
  3:
63
+ - 1
64
+ - 7
65
  - 13
66
  4: 3.9.5
67
  5: 0.12.10
wandb/run-20220505_164238-1zwo1c2h/files/output.log CHANGED
@@ -51,3 +51,25 @@ Configuration saved in ./config.json
51
  train_samples_per_second = 71142.69
52
  train_steps_per_second = 1111.529
53
  Model weights saved in ./pytorch_model.bin
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  train_samples_per_second = 71142.69
52
  train_steps_per_second = 1111.529
53
  Model weights saved in ./pytorch_model.bin
54
+ Feature extractor saved in ./preprocessor_config.json
55
+ 05/05/2022 16:44:38 - WARNING - huggingface_hub.repository - To https://huggingface.co/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en
56
+ 1603dfb..c65f244 main -> main
57
+ To https://huggingface.co/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en
58
+ 1603dfb..c65f244 main -> main
59
+ Traceback (most recent call last):
60
+ File "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/run_xtreme_s.py", line 972, in <module>
61
+ main()
62
+ File "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/run_xtreme_s.py", line 964, in main
63
+ trainer.push_to_hub(**kwargs)
64
+ File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 2944, in push_to_hub
65
+ self.create_model_card(model_name=model_name, **kwargs)
66
+ File "/home/sanchit_huggingface_co/transformers/src/transformers/trainer.py", line 2841, in create_model_card
67
+ training_summary = TrainingSummary.from_trainer(
68
+ File "/home/sanchit_huggingface_co/transformers/src/transformers/modelcard.py", line 611, in from_trainer
69
+ return cls(
70
+ File "<string>", line 16, in __init__
71
+ File "/home/sanchit_huggingface_co/transformers/src/transformers/modelcard.py", line 401, in __post_init__
72
+ info = model_info(self.finetuned_from)
73
+ File "/home/sanchit_huggingface_co/gcp/lib/python3.9/site-packages/huggingface_hub/hf_api.py", line 870, in model_info
74
+ return ModelInfo(**d)
75
+ TypeError: huggingface_hub.hf_api.ModelInfo() argument after ** must be a mapping, not list
wandb/run-20220505_164238-1zwo1c2h/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/train_runtime": 8.7447, "train/train_samples_per_second": 71142.69, "train/train_steps_per_second": 1111.529, "train/total_flos": 0.0, "train/train_loss": 0.0, "train/epoch": 3.0, "train/global_step": 9720, "_runtime": 9, "_timestamp": 1651768967, "_step": 0}
1
+ {"train/train_runtime": 8.7447, "train/train_samples_per_second": 71142.69, "train/train_steps_per_second": 1111.529, "train/total_flos": 0.0, "train/train_loss": 0.0, "train/epoch": 3.0, "train/global_step": 9720, "_runtime": 9, "_timestamp": 1651768967, "_step": 0, "_wandb": {"runtime": 124}}
wandb/run-20220505_164238-1zwo1c2h/logs/debug-internal.log CHANGED
@@ -68,3 +68,113 @@
68
  2022-05-05 16:44:15,988 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: stop_status
69
  2022-05-05 16:44:31,040 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: stop_status
70
  2022-05-05 16:44:31,040 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: stop_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  2022-05-05 16:44:15,988 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: stop_status
69
  2022-05-05 16:44:31,040 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: stop_status
70
  2022-05-05 16:44:31,040 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: stop_status
71
+ 2022-05-05 16:44:40,445 INFO Thread-8 :66061 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/output.log
72
+ 2022-05-05 16:44:42,296 DEBUG SenderThread:66061 [sender.py:send():235] send: stats
73
+ 2022-05-05 16:44:44,447 INFO Thread-8 :66061 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/output.log
74
+ 2022-05-05 16:44:44,719 DEBUG SenderThread:66061 [sender.py:send():235] send: telemetry
75
+ 2022-05-05 16:44:44,719 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
76
+ 2022-05-05 16:44:44,720 DEBUG SenderThread:66061 [sender.py:send():235] send: exit
77
+ 2022-05-05 16:44:44,720 INFO SenderThread:66061 [sender.py:send_exit():371] handling exit code: 1
78
+ 2022-05-05 16:44:44,720 INFO SenderThread:66061 [sender.py:send_exit():373] handling runtime: 124
79
+ 2022-05-05 16:44:44,721 INFO SenderThread:66061 [sender.py:_save_file():944] saving file wandb-summary.json with policy end
80
+ 2022-05-05 16:44:44,721 INFO SenderThread:66061 [sender.py:send_exit():379] send defer
81
+ 2022-05-05 16:44:44,721 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
82
+ 2022-05-05 16:44:44,722 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
83
+ 2022-05-05 16:44:44,722 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 0
84
+ 2022-05-05 16:44:44,722 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
85
+ 2022-05-05 16:44:44,722 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 0
86
+ 2022-05-05 16:44:44,722 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 1
87
+ 2022-05-05 16:44:44,722 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
88
+ 2022-05-05 16:44:44,722 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 1
89
+ 2022-05-05 16:44:44,840 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
90
+ 2022-05-05 16:44:44,840 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
91
+ 2022-05-05 16:44:44,841 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 1
92
+ 2022-05-05 16:44:44,841 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 2
93
+ 2022-05-05 16:44:44,841 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
94
+ 2022-05-05 16:44:44,841 DEBUG SenderThread:66061 [sender.py:send():235] send: stats
95
+ 2022-05-05 16:44:44,842 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
96
+ 2022-05-05 16:44:44,842 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 2
97
+ 2022-05-05 16:44:44,842 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
98
+ 2022-05-05 16:44:44,842 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 2
99
+ 2022-05-05 16:44:44,843 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 3
100
+ 2022-05-05 16:44:44,843 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
101
+ 2022-05-05 16:44:44,843 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 3
102
+ 2022-05-05 16:44:44,843 DEBUG SenderThread:66061 [sender.py:send():235] send: summary
103
+ 2022-05-05 16:44:44,844 INFO SenderThread:66061 [sender.py:_save_file():944] saving file wandb-summary.json with policy end
104
+ 2022-05-05 16:44:44,844 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
105
+ 2022-05-05 16:44:44,844 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 3
106
+ 2022-05-05 16:44:44,844 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 4
107
+ 2022-05-05 16:44:44,844 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
108
+ 2022-05-05 16:44:44,844 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 4
109
+ 2022-05-05 16:44:44,844 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
110
+ 2022-05-05 16:44:44,844 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 4
111
+ 2022-05-05 16:44:44,947 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
112
+ 2022-05-05 16:44:44,948 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 5
113
+ 2022-05-05 16:44:44,948 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
114
+ 2022-05-05 16:44:44,948 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
115
+ 2022-05-05 16:44:44,948 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 5
116
+ 2022-05-05 16:44:44,948 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
117
+ 2022-05-05 16:44:44,949 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 5
118
+ 2022-05-05 16:44:44,949 INFO SenderThread:66061 [dir_watcher.py:finish():283] shutting down directory watcher
119
+ 2022-05-05 16:44:45,050 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
120
+ 2022-05-05 16:44:45,447 INFO Thread-8 :66061 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/config.yaml
121
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/output.log
122
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/wandb-summary.json
123
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:finish():313] scan: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files
124
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/wandb-metadata.json wandb-metadata.json
125
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/output.log output.log
126
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/wandb-summary.json wandb-summary.json
127
+ 2022-05-05 16:44:45,448 INFO SenderThread:66061 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/requirements.txt requirements.txt
128
+ 2022-05-05 16:44:45,451 INFO SenderThread:66061 [dir_watcher.py:finish():327] scan save: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/config.yaml config.yaml
129
+ 2022-05-05 16:44:45,452 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 6
130
+ 2022-05-05 16:44:45,452 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
131
+ 2022-05-05 16:44:45,453 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
132
+ 2022-05-05 16:44:45,453 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 6
133
+ 2022-05-05 16:44:45,456 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
134
+ 2022-05-05 16:44:45,456 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 6
135
+ 2022-05-05 16:44:45,456 INFO SenderThread:66061 [file_pusher.py:finish():177] shutting down file pusher
136
+ 2022-05-05 16:44:45,554 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
137
+ 2022-05-05 16:44:45,554 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
138
+ 2022-05-05 16:44:45,656 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
139
+ 2022-05-05 16:44:45,656 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
140
+ 2022-05-05 16:44:45,705 INFO Thread-12 :66061 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/output.log
141
+ 2022-05-05 16:44:45,716 INFO Thread-15 :66061 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/config.yaml
142
+ 2022-05-05 16:44:45,723 INFO Thread-13 :66061 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/wandb-summary.json
143
+ 2022-05-05 16:44:45,758 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
144
+ 2022-05-05 16:44:45,759 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
145
+ 2022-05-05 16:44:45,834 INFO Thread-14 :66061 [upload_job.py:push():137] Uploaded file /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/files/requirements.txt
146
+ 2022-05-05 16:44:45,861 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
147
+ 2022-05-05 16:44:45,861 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
148
+ 2022-05-05 16:44:45,963 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
149
+ 2022-05-05 16:44:45,963 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
150
+ 2022-05-05 16:44:46,035 INFO Thread-7 :66061 [sender.py:transition_state():392] send defer: 7
151
+ 2022-05-05 16:44:46,035 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
152
+ 2022-05-05 16:44:46,036 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 7
153
+ 2022-05-05 16:44:46,036 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
154
+ 2022-05-05 16:44:46,036 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 7
155
+ 2022-05-05 16:44:46,065 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
156
+ 2022-05-05 16:44:46,159 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 8
157
+ 2022-05-05 16:44:46,159 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
158
+ 2022-05-05 16:44:46,160 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
159
+ 2022-05-05 16:44:46,160 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 8
160
+ 2022-05-05 16:44:46,161 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
161
+ 2022-05-05 16:44:46,161 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 8
162
+ 2022-05-05 16:44:46,161 INFO SenderThread:66061 [sender.py:transition_state():392] send defer: 9
163
+ 2022-05-05 16:44:46,161 DEBUG SenderThread:66061 [sender.py:send():235] send: final
164
+ 2022-05-05 16:44:46,162 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: defer
165
+ 2022-05-05 16:44:46,162 INFO HandlerThread:66061 [handler.py:handle_request_defer():154] handle defer: 9
166
+ 2022-05-05 16:44:46,162 DEBUG SenderThread:66061 [sender.py:send():235] send: footer
167
+ 2022-05-05 16:44:46,162 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: defer
168
+ 2022-05-05 16:44:46,162 INFO SenderThread:66061 [sender.py:send_request_defer():388] handle sender defer: 9
169
+ 2022-05-05 16:44:46,262 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: poll_exit
170
+ 2022-05-05 16:44:46,262 DEBUG SenderThread:66061 [sender.py:send_request():249] send_request: poll_exit
171
+ 2022-05-05 16:44:46,262 INFO SenderThread:66061 [file_pusher.py:join():182] waiting for file pusher
172
+ 2022-05-05 16:44:46,322 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: get_summary
173
+ 2022-05-05 16:44:46,323 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: sampled_history
174
+ 2022-05-05 16:44:46,324 DEBUG HandlerThread:66061 [handler.py:handle_request():131] handle_request: shutdown
175
+ 2022-05-05 16:44:46,325 INFO HandlerThread:66061 [handler.py:finish():739] shutting down handler
176
+ 2022-05-05 16:44:47,162 INFO WriterThread:66061 [datastore.py:close():281] close: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164238-1zwo1c2h/run-1zwo1c2h.wandb
177
+ 2022-05-05 16:44:47,321 INFO SenderThread:66061 [sender.py:finish():1075] shutting down sender
178
+ 2022-05-05 16:44:47,321 INFO SenderThread:66061 [file_pusher.py:finish():177] shutting down file pusher
179
+ 2022-05-05 16:44:47,321 INFO SenderThread:66061 [file_pusher.py:join():182] waiting for file pusher
180
+ 2022-05-05 16:44:47,324 INFO MainThread:66061 [internal.py:handle_exit():79] Internal process exited
wandb/run-20220505_164238-1zwo1c2h/logs/debug.log CHANGED
@@ -26,3 +26,101 @@ config: {}
26
  2022-05-05 16:42:42,806 INFO MainThread:65949 [wandb_init.py:init():651] run started, returning control to user process
27
  2022-05-05 16:42:42,810 INFO MainThread:65949 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.04999238095195753, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May05_16-41-38_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8}
28
  2022-05-05 16:42:42,815 INFO MainThread:65949 [wandb_watch.py:watch():43] Watching
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  2022-05-05 16:42:42,806 INFO MainThread:65949 [wandb_init.py:init():651] run started, returning control to user process
27
  2022-05-05 16:42:42,810 INFO MainThread:65949 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.04999238095195753, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May05_16-41-38_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8}
28
  2022-05-05 16:42:42,815 INFO MainThread:65949 [wandb_watch.py:watch():43] Watching
29
+ 2022-05-05 16:44:41,975 INFO MainThread:65949 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1
30
+ 2022-05-05 16:44:41,980 INFO MainThread:65949 [wandb_run.py:_restore():1769] restore
31
+ 2022-05-05 16:44:44,722 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
32
+ wandb_count: 1
33
+ }
34
+ pusher_stats {
35
+ uploaded_bytes: 1971
36
+ total_bytes: 1971
37
+ }
38
+
39
+ 2022-05-05 16:44:44,842 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
40
+ wandb_count: 1
41
+ }
42
+ pusher_stats {
43
+ uploaded_bytes: 1971
44
+ total_bytes: 1971
45
+ }
46
+
47
+ 2022-05-05 16:44:44,949 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
48
+ wandb_count: 1
49
+ }
50
+ pusher_stats {
51
+ uploaded_bytes: 1971
52
+ total_bytes: 1971
53
+ }
54
+
55
+ 2022-05-05 16:44:45,453 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
56
+ wandb_count: 3
57
+ }
58
+ pusher_stats {
59
+ uploaded_bytes: 1971
60
+ total_bytes: 8526
61
+ }
62
+
63
+ 2022-05-05 16:44:45,555 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
64
+ wandb_count: 5
65
+ }
66
+ pusher_stats {
67
+ uploaded_bytes: 22337
68
+ total_bytes: 27371
69
+ }
70
+
71
+ 2022-05-05 16:44:45,657 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
72
+ wandb_count: 5
73
+ }
74
+ pusher_stats {
75
+ uploaded_bytes: 27371
76
+ total_bytes: 27371
77
+ }
78
+
79
+ 2022-05-05 16:44:45,759 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
80
+ wandb_count: 5
81
+ }
82
+ pusher_stats {
83
+ uploaded_bytes: 27371
84
+ total_bytes: 27371
85
+ }
86
+
87
+ 2022-05-05 16:44:45,862 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
88
+ wandb_count: 5
89
+ }
90
+ pusher_stats {
91
+ uploaded_bytes: 27371
92
+ total_bytes: 27371
93
+ }
94
+
95
+ 2022-05-05 16:44:45,964 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
96
+ wandb_count: 5
97
+ }
98
+ pusher_stats {
99
+ uploaded_bytes: 27371
100
+ total_bytes: 27371
101
+ }
102
+
103
+ 2022-05-05 16:44:46,160 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
104
+ wandb_count: 5
105
+ }
106
+ pusher_stats {
107
+ uploaded_bytes: 27371
108
+ total_bytes: 27371
109
+ }
110
+
111
+ 2022-05-05 16:44:46,322 INFO MainThread:65949 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true
112
+ exit_result {
113
+ }
114
+ file_counts {
115
+ wandb_count: 5
116
+ }
117
+ pusher_stats {
118
+ uploaded_bytes: 27371
119
+ total_bytes: 27371
120
+ }
121
+ local_info {
122
+ }
123
+
124
+ 2022-05-05 16:44:47,960 INFO MainThread:65949 [wandb_run.py:_append_history():2144] rendering history
125
+ 2022-05-05 16:44:47,961 INFO MainThread:65949 [wandb_run.py:_append_summary():2102] rendering summary
126
+ 2022-05-05 16:44:47,961 INFO MainThread:65949 [wandb_run.py:_append_files():2194] logging synced files
wandb/run-20220505_164238-1zwo1c2h/run-1zwo1c2h.wandb CHANGED
Binary files a/wandb/run-20220505_164238-1zwo1c2h/run-1zwo1c2h.wandb and b/wandb/run-20220505_164238-1zwo1c2h/run-1zwo1c2h.wandb differ
wandb/run-20220505_164550-0sgg5024/files/config.yaml ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _n_gpu:
4
+ desc: null
5
+ value: 1
6
+ _name_or_path:
7
+ desc: null
8
+ value: ./
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.12.10
13
+ framework: huggingface
14
+ huggingface_version: 4.19.0.dev0
15
+ is_jupyter_run: false
16
+ is_kaggle_kernel: false
17
+ m:
18
+ - 1: train/global_step
19
+ 6:
20
+ - 3
21
+ - 1: train/train_runtime
22
+ 5: 1
23
+ 6:
24
+ - 1
25
+ - 1: train/train_samples_per_second
26
+ 5: 1
27
+ 6:
28
+ - 1
29
+ - 1: train/train_steps_per_second
30
+ 5: 1
31
+ 6:
32
+ - 1
33
+ - 1: train/total_flos
34
+ 5: 1
35
+ 6:
36
+ - 1
37
+ - 1: train/train_loss
38
+ 5: 1
39
+ 6:
40
+ - 1
41
+ - 1: train/epoch
42
+ 5: 1
43
+ 6:
44
+ - 1
45
+ python_version: 3.9.5
46
+ start_time: 1651769150
47
+ t:
48
+ 1:
49
+ - 1
50
+ - 2
51
+ - 3
52
+ - 5
53
+ - 11
54
+ - 12
55
+ 3:
56
+ - 13
57
+ 4: 3.9.5
58
+ 5: 0.12.10
59
+ 6: 4.19.0.dev0
60
+ 8:
61
+ - 5
62
+ adafactor:
63
+ desc: null
64
+ value: false
65
+ adam_beta1:
66
+ desc: null
67
+ value: 0.9
68
+ adam_beta2:
69
+ desc: null
70
+ value: 0.999
71
+ adam_epsilon:
72
+ desc: null
73
+ value: 1.0e-08
74
+ add_cross_attention:
75
+ desc: null
76
+ value: false
77
+ architectures:
78
+ desc: null
79
+ value:
80
+ - SpeechEncoderDecoderModel
81
+ bad_words_ids:
82
+ desc: null
83
+ value: null
84
+ bf16:
85
+ desc: null
86
+ value: false
87
+ bf16_full_eval:
88
+ desc: null
89
+ value: false
90
+ bos_token_id:
91
+ desc: null
92
+ value: null
93
+ chunk_size_feed_forward:
94
+ desc: null
95
+ value: 0
96
+ cross_attention_hidden_size:
97
+ desc: null
98
+ value: null
99
+ data_seed:
100
+ desc: null
101
+ value: None
102
+ dataloader_drop_last:
103
+ desc: null
104
+ value: false
105
+ dataloader_num_workers:
106
+ desc: null
107
+ value: 0
108
+ dataloader_pin_memory:
109
+ desc: null
110
+ value: true
111
+ ddp_bucket_cap_mb:
112
+ desc: null
113
+ value: None
114
+ ddp_find_unused_parameters:
115
+ desc: null
116
+ value: None
117
+ debug:
118
+ desc: null
119
+ value: '[]'
120
+ decoder:
121
+ desc: null
122
+ value:
123
+ _name_or_path: facebook/bart-large
124
+ activation_dropout: 0.1
125
+ activation_function: gelu
126
+ add_bias_logits: false
127
+ add_cross_attention: true
128
+ add_final_layer_norm: false
129
+ architectures:
130
+ - BartModel
131
+ attention_dropout: 0.1
132
+ bad_words_ids: null
133
+ bos_token_id: 0
134
+ chunk_size_feed_forward: 0
135
+ classif_dropout: 0.1
136
+ classifier_dropout: 0.0
137
+ cross_attention_hidden_size: null
138
+ d_model: 1024
139
+ decoder_attention_heads: 16
140
+ decoder_ffn_dim: 4096
141
+ decoder_layerdrop: 0.0
142
+ decoder_layers: 12
143
+ decoder_start_token_id: 2
144
+ diversity_penalty: 0.0
145
+ do_sample: false
146
+ dropout: 0.1
147
+ early_stopping: true
148
+ encoder_attention_heads: 16
149
+ encoder_ffn_dim: 4096
150
+ encoder_layerdrop: 0.0
151
+ encoder_layers: 12
152
+ encoder_no_repeat_ngram_size: 0
153
+ eos_token_id: 2
154
+ exponential_decay_length_penalty: null
155
+ finetuning_task: null
156
+ forced_bos_token_id: 0
157
+ forced_eos_token_id: 2
158
+ gradient_checkpointing: false
159
+ id2label:
160
+ '0': LABEL_0
161
+ '1': LABEL_1
162
+ '2': LABEL_2
163
+ init_std: 0.02
164
+ is_decoder: true
165
+ is_encoder_decoder: false
166
+ label2id:
167
+ LABEL_0: 0
168
+ LABEL_1: 1
169
+ LABEL_2: 2
170
+ length_penalty: 1.0
171
+ max_length: 20
172
+ max_position_embeddings: 1024
173
+ min_length: 0
174
+ model_type: bart
175
+ no_repeat_ngram_size: 3
176
+ normalize_before: false
177
+ num_beam_groups: 1
178
+ num_beams: 4
179
+ num_hidden_layers: 12
180
+ num_return_sequences: 1
181
+ output_attentions: false
182
+ output_hidden_states: false
183
+ output_scores: false
184
+ pad_token_id: 1
185
+ prefix: null
186
+ problem_type: null
187
+ pruned_heads: {}
188
+ remove_invalid_values: false
189
+ repetition_penalty: 1.0
190
+ return_dict: true
191
+ return_dict_in_generate: false
192
+ scale_embedding: false
193
+ sep_token_id: null
194
+ task_specific_params:
195
+ summarization:
196
+ length_penalty: 1.0
197
+ max_length: 128
198
+ min_length: 12
199
+ num_beams: 4
200
+ summarization_cnn:
201
+ length_penalty: 2.0
202
+ max_length: 142
203
+ min_length: 56
204
+ num_beams: 4
205
+ summarization_xsum:
206
+ length_penalty: 1.0
207
+ max_length: 62
208
+ min_length: 11
209
+ num_beams: 6
210
+ temperature: 1.0
211
+ tie_encoder_decoder: false
212
+ tie_word_embeddings: true
213
+ tokenizer_class: null
214
+ top_k: 50
215
+ top_p: 1.0
216
+ torch_dtype: null
217
+ torchscript: false
218
+ transformers_version: 4.19.0.dev0
219
+ typical_p: 1.0
220
+ use_bfloat16: false
221
+ use_cache: true
222
+ vocab_size: 50265
223
+ decoder_start_token_id:
224
+ desc: null
225
+ value: 0
226
+ deepspeed:
227
+ desc: null
228
+ value: None
229
+ disable_tqdm:
230
+ desc: null
231
+ value: false
232
+ diversity_penalty:
233
+ desc: null
234
+ value: 0.0
235
+ do_eval:
236
+ desc: null
237
+ value: true
238
+ do_predict:
239
+ desc: null
240
+ value: false
241
+ do_sample:
242
+ desc: null
243
+ value: false
244
+ do_train:
245
+ desc: null
246
+ value: true
247
+ early_stopping:
248
+ desc: null
249
+ value: false
250
+ encoder:
251
+ desc: null
252
+ value:
253
+ _name_or_path: facebook/wav2vec2-xls-r-300m
254
+ activation_dropout: 0.0
255
+ adapter_kernel_size: 3
256
+ adapter_stride: 2
257
+ add_adapter: true
258
+ add_cross_attention: false
259
+ apply_spec_augment: true
260
+ architectures:
261
+ - Wav2Vec2ForPreTraining
262
+ attention_dropout: 0.1
263
+ bad_words_ids: null
264
+ bos_token_id: 1
265
+ chunk_size_feed_forward: 0
266
+ classifier_proj_size: 256
267
+ codevector_dim: 768
268
+ contrastive_logits_temperature: 0.1
269
+ conv_bias: true
270
+ conv_dim:
271
+ - 512
272
+ - 512
273
+ - 512
274
+ - 512
275
+ - 512
276
+ - 512
277
+ - 512
278
+ conv_kernel:
279
+ - 10
280
+ - 3
281
+ - 3
282
+ - 3
283
+ - 3
284
+ - 2
285
+ - 2
286
+ conv_stride:
287
+ - 5
288
+ - 2
289
+ - 2
290
+ - 2
291
+ - 2
292
+ - 2
293
+ - 2
294
+ cross_attention_hidden_size: null
295
+ ctc_loss_reduction: sum
296
+ ctc_zero_infinity: false
297
+ decoder_start_token_id: null
298
+ diversity_loss_weight: 0.1
299
+ diversity_penalty: 0.0
300
+ do_sample: false
301
+ do_stable_layer_norm: true
302
+ early_stopping: false
303
+ encoder_no_repeat_ngram_size: 0
304
+ eos_token_id: 2
305
+ exponential_decay_length_penalty: null
306
+ feat_extract_activation: gelu
307
+ feat_extract_dropout: 0.0
308
+ feat_extract_norm: layer
309
+ feat_proj_dropout: 0.0
310
+ feat_quantizer_dropout: 0.0
311
+ final_dropout: 0.0
312
+ finetuning_task: null
313
+ forced_bos_token_id: null
314
+ forced_eos_token_id: null
315
+ gradient_checkpointing: false
316
+ hidden_act: gelu
317
+ hidden_dropout: 0.1043496520848404
318
+ hidden_size: 1024
319
+ id2label:
320
+ '0': LABEL_0
321
+ '1': LABEL_1
322
+ initializer_range: 0.02
323
+ intermediate_size: 4096
324
+ is_decoder: false
325
+ is_encoder_decoder: false
326
+ label2id:
327
+ LABEL_0: 0
328
+ LABEL_1: 1
329
+ layer_norm_eps: 1.0e-05
330
+ layerdrop: 0.0
331
+ length_penalty: 1.0
332
+ mask_feature_length: 10
333
+ mask_feature_min_masks: 0
334
+ mask_feature_prob: 0.0
335
+ mask_time_length: 10
336
+ mask_time_min_masks: 2
337
+ mask_time_prob: 0.1
338
+ max_length: 20
339
+ min_length: 0
340
+ model_type: wav2vec2
341
+ no_repeat_ngram_size: 0
342
+ num_adapter_layers: 3
343
+ num_attention_heads: 16
344
+ num_beam_groups: 1
345
+ num_beams: 1
346
+ num_codevector_groups: 2
347
+ num_codevectors_per_group: 320
348
+ num_conv_pos_embedding_groups: 16
349
+ num_conv_pos_embeddings: 128
350
+ num_feat_extract_layers: 7
351
+ num_hidden_layers: 24
352
+ num_negatives: 100
353
+ num_return_sequences: 1
354
+ output_attentions: false
355
+ output_hidden_size: 1024
356
+ output_hidden_states: false
357
+ output_scores: false
358
+ pad_token_id: 0
359
+ prefix: null
360
+ problem_type: null
361
+ proj_codevector_dim: 768
362
+ pruned_heads: {}
363
+ remove_invalid_values: false
364
+ repetition_penalty: 1.0
365
+ return_dict: true
366
+ return_dict_in_generate: false
367
+ sep_token_id: null
368
+ task_specific_params: null
369
+ tdnn_dilation:
370
+ - 1
371
+ - 2
372
+ - 3
373
+ - 1
374
+ - 1
375
+ tdnn_dim:
376
+ - 512
377
+ - 512
378
+ - 512
379
+ - 512
380
+ - 1500
381
+ tdnn_kernel:
382
+ - 5
383
+ - 3
384
+ - 3
385
+ - 1
386
+ - 1
387
+ temperature: 1.0
388
+ tie_encoder_decoder: false
389
+ tie_word_embeddings: true
390
+ tokenizer_class: null
391
+ top_k: 50
392
+ top_p: 1.0
393
+ torch_dtype: float32
394
+ torchscript: false
395
+ transformers_version: 4.19.0.dev0
396
+ typical_p: 1.0
397
+ use_bfloat16: false
398
+ use_weighted_layer_sum: false
399
+ vocab_size: 32
400
+ xvector_output_dim: 512
401
+ encoder_no_repeat_ngram_size:
402
+ desc: null
403
+ value: 0
404
+ eos_token_id:
405
+ desc: null
406
+ value: 2
407
+ eval_accumulation_steps:
408
+ desc: null
409
+ value: None
410
+ eval_batch_size:
411
+ desc: null
412
+ value: 8
413
+ eval_delay:
414
+ desc: null
415
+ value: 0
416
+ eval_split_name:
417
+ desc: null
418
+ value: test
419
+ eval_steps:
420
+ desc: null
421
+ value: 500
422
+ evaluation_strategy:
423
+ desc: null
424
+ value: steps
425
+ exponential_decay_length_penalty:
426
+ desc: null
427
+ value: null
428
+ finetuning_task:
429
+ desc: null
430
+ value: null
431
+ forced_bos_token_id:
432
+ desc: null
433
+ value: null
434
+ forced_eos_token_id:
435
+ desc: null
436
+ value: null
437
+ fp16:
438
+ desc: null
439
+ value: true
440
+ fp16_backend:
441
+ desc: null
442
+ value: auto
443
+ fp16_full_eval:
444
+ desc: null
445
+ value: false
446
+ fp16_opt_level:
447
+ desc: null
448
+ value: O1
449
+ generation_max_length:
450
+ desc: null
451
+ value: 40
452
+ generation_num_beams:
453
+ desc: null
454
+ value: 1
455
+ gradient_accumulation_steps:
456
+ desc: null
457
+ value: 8
458
+ gradient_checkpointing:
459
+ desc: null
460
+ value: true
461
+ greater_is_better:
462
+ desc: null
463
+ value: true
464
+ group_by_length:
465
+ desc: null
466
+ value: true
467
+ half_precision_backend:
468
+ desc: null
469
+ value: amp
470
+ hidden_dropout:
471
+ desc: null
472
+ value: 0.1043496520848404
473
+ hub_model_id:
474
+ desc: null
475
+ value: None
476
+ hub_private_repo:
477
+ desc: null
478
+ value: false
479
+ hub_strategy:
480
+ desc: null
481
+ value: every_save
482
+ hub_token:
483
+ desc: null
484
+ value: <HUB_TOKEN>
485
+ id2label:
486
+ desc: null
487
+ value:
488
+ '0': LABEL_0
489
+ '1': LABEL_1
490
+ ignore_data_skip:
491
+ desc: null
492
+ value: false
493
+ include_inputs_for_metrics:
494
+ desc: null
495
+ value: false
496
+ is_decoder:
497
+ desc: null
498
+ value: false
499
+ is_encoder_decoder:
500
+ desc: null
501
+ value: true
502
+ label2id:
503
+ desc: null
504
+ value:
505
+ LABEL_0: 0
506
+ LABEL_1: 1
507
+ label_names:
508
+ desc: null
509
+ value: None
510
+ label_smoothing_factor:
511
+ desc: null
512
+ value: 0.0
513
+ language:
514
+ desc: null
515
+ value: fr.en
516
+ learning_rate:
517
+ desc: null
518
+ value: 0.00023215434357723729
519
+ length_column_name:
520
+ desc: null
521
+ value: length
522
+ length_penalty:
523
+ desc: null
524
+ value: 1.0
525
+ load_best_model_at_end:
526
+ desc: null
527
+ value: true
528
+ local_rank:
529
+ desc: null
530
+ value: -1
531
+ log_level:
532
+ desc: null
533
+ value: -1
534
+ log_level_replica:
535
+ desc: null
536
+ value: -1
537
+ log_on_each_node:
538
+ desc: null
539
+ value: true
540
+ logging_dir:
541
+ desc: null
542
+ value: ./runs/May05_16-44-56_sanchit--v100
543
+ logging_first_step:
544
+ desc: null
545
+ value: false
546
+ logging_nan_inf_filter:
547
+ desc: null
548
+ value: true
549
+ logging_steps:
550
+ desc: null
551
+ value: 1
552
+ logging_strategy:
553
+ desc: null
554
+ value: steps
555
+ lr_scheduler_type:
556
+ desc: null
557
+ value: linear
558
+ max_duration_in_seconds:
559
+ desc: null
560
+ value: 20
561
+ max_grad_norm:
562
+ desc: null
563
+ value: 1.0
564
+ max_length:
565
+ desc: null
566
+ value: 40
567
+ max_steps:
568
+ desc: null
569
+ value: -1
570
+ metric_for_best_model:
571
+ desc: null
572
+ value: bleu
573
+ min_length:
574
+ desc: null
575
+ value: 0
576
+ model_name_or_path:
577
+ desc: null
578
+ value: ./
579
+ model_type:
580
+ desc: null
581
+ value: speech-encoder-decoder
582
+ mp_parameters:
583
+ desc: null
584
+ value: ''
585
+ no_cuda:
586
+ desc: null
587
+ value: false
588
+ no_repeat_ngram_size:
589
+ desc: null
590
+ value: 0
591
+ num_beam_groups:
592
+ desc: null
593
+ value: 1
594
+ num_beams:
595
+ desc: null
596
+ value: 1
597
+ num_return_sequences:
598
+ desc: null
599
+ value: 1
600
+ num_train_epochs:
601
+ desc: null
602
+ value: 3
603
+ optim:
604
+ desc: null
605
+ value: adamw_hf
606
+ output_attentions:
607
+ desc: null
608
+ value: false
609
+ output_dir:
610
+ desc: null
611
+ value: ./
612
+ output_hidden_states:
613
+ desc: null
614
+ value: false
615
+ output_scores:
616
+ desc: null
617
+ value: false
618
+ overwrite_output_dir:
619
+ desc: null
620
+ value: true
621
+ pad_token_id:
622
+ desc: null
623
+ value: 1
624
+ past_index:
625
+ desc: null
626
+ value: -1
627
+ per_device_eval_batch_size:
628
+ desc: null
629
+ value: 8
630
+ per_device_train_batch_size:
631
+ desc: null
632
+ value: 8
633
+ per_gpu_eval_batch_size:
634
+ desc: null
635
+ value: None
636
+ per_gpu_train_batch_size:
637
+ desc: null
638
+ value: None
639
+ predict_with_generate:
640
+ desc: null
641
+ value: true
642
+ prediction_loss_only:
643
+ desc: null
644
+ value: false
645
+ prefix:
646
+ desc: null
647
+ value: null
648
+ problem_type:
649
+ desc: null
650
+ value: null
651
+ processor_class:
652
+ desc: null
653
+ value: Wav2Vec2Processor
654
+ pruned_heads:
655
+ desc: null
656
+ value: {}
657
+ push_to_hub:
658
+ desc: null
659
+ value: true
660
+ push_to_hub_model_id:
661
+ desc: null
662
+ value: None
663
+ push_to_hub_organization:
664
+ desc: null
665
+ value: None
666
+ push_to_hub_token:
667
+ desc: null
668
+ value: <PUSH_TO_HUB_TOKEN>
669
+ remove_invalid_values:
670
+ desc: null
671
+ value: false
672
+ remove_unused_columns:
673
+ desc: null
674
+ value: true
675
+ repetition_penalty:
676
+ desc: null
677
+ value: 1.0
678
+ report_to:
679
+ desc: null
680
+ value: '[''tensorboard'', ''wandb'', ''codecarbon'']'
681
+ resume_from_checkpoint:
682
+ desc: null
683
+ value: None
684
+ return_dict:
685
+ desc: null
686
+ value: true
687
+ return_dict_in_generate:
688
+ desc: null
689
+ value: false
690
+ run_name:
691
+ desc: null
692
+ value: ./
693
+ save_on_each_node:
694
+ desc: null
695
+ value: false
696
+ save_steps:
697
+ desc: null
698
+ value: 500
699
+ save_strategy:
700
+ desc: null
701
+ value: steps
702
+ save_total_limit:
703
+ desc: null
704
+ value: None
705
+ seed:
706
+ desc: null
707
+ value: 42
708
+ sep_token_id:
709
+ desc: null
710
+ value: null
711
+ sharded_ddp:
712
+ desc: null
713
+ value: '[]'
714
+ skip_memory_metrics:
715
+ desc: null
716
+ value: true
717
+ sortish_sampler:
718
+ desc: null
719
+ value: false
720
+ task:
721
+ desc: null
722
+ value: covost2
723
+ task_specific_params:
724
+ desc: null
725
+ value: null
726
+ temperature:
727
+ desc: null
728
+ value: 1.0
729
+ tf32:
730
+ desc: null
731
+ value: None
732
+ tie_encoder_decoder:
733
+ desc: null
734
+ value: false
735
+ tie_word_embeddings:
736
+ desc: null
737
+ value: false
738
+ tokenizer_class:
739
+ desc: null
740
+ value: null
741
+ top_k:
742
+ desc: null
743
+ value: 50
744
+ top_p:
745
+ desc: null
746
+ value: 1.0
747
+ torch_dtype:
748
+ desc: null
749
+ value: torch.float32
750
+ torchscript:
751
+ desc: null
752
+ value: false
753
+ tpu_metrics_debug:
754
+ desc: null
755
+ value: false
756
+ tpu_num_cores:
757
+ desc: null
758
+ value: None
759
+ train_batch_size:
760
+ desc: null
761
+ value: 8
762
+ transformers_version:
763
+ desc: null
764
+ value: null
765
+ typical_p:
766
+ desc: null
767
+ value: 1.0
768
+ use_bfloat16:
769
+ desc: null
770
+ value: false
771
+ use_cache:
772
+ desc: null
773
+ value: false
774
+ use_legacy_prediction_loop:
775
+ desc: null
776
+ value: false
777
+ warmup_ratio:
778
+ desc: null
779
+ value: 0.0
780
+ warmup_steps:
781
+ desc: null
782
+ value: 500
783
+ weight_decay:
784
+ desc: null
785
+ value: 0.0
786
+ xpu_backend:
787
+ desc: null
788
+ value: None
wandb/run-20220505_164550-0sgg5024/files/output.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: WARNING Config item 'output_dir' was locked by 'sweep' (ignored update).
2
+ wandb: WARNING Config item 'evaluation_strategy' was locked by 'sweep' (ignored update).
3
+ wandb: WARNING Config item 'per_device_train_batch_size' was locked by 'sweep' (ignored update).
4
+ wandb: WARNING Config item 'per_device_eval_batch_size' was locked by 'sweep' (ignored update).
5
+ wandb: WARNING Config item 'gradient_accumulation_steps' was locked by 'sweep' (ignored update).
6
+ wandb: WARNING Config item 'learning_rate' was locked by 'sweep' (ignored update).
7
+ wandb: WARNING Config item 'num_train_epochs' was locked by 'sweep' (ignored update).
8
+ wandb: WARNING Config item 'warmup_steps' was locked by 'sweep' (ignored update).
9
+ wandb: WARNING Config item 'logging_steps' was locked by 'sweep' (ignored update).
10
+ wandb: WARNING Config item 'save_steps' was locked by 'sweep' (ignored update).
11
+ wandb: WARNING Config item 'eval_steps' was locked by 'sweep' (ignored update).
12
+ wandb: WARNING Config item 'metric_for_best_model' was locked by 'sweep' (ignored update).
13
+ wandb: WARNING Config item 'greater_is_better' was locked by 'sweep' (ignored update).
14
+ wandb: WARNING Config item 'generation_max_length' was locked by 'sweep' (ignored update).
15
+ wandb: WARNING Config item 'generation_num_beams' was locked by 'sweep' (ignored update).
16
+ 0%| | 0/9720 [00:00<?, ?it/s]
17
+ Training completed. Do not forget to share your model on huggingface.co/models =)
18
+ Loading best model from ./checkpoint-1000 (score: 2.4961869532998874e-13).
19
+ {'train_runtime': 8.4854, 'train_samples_per_second': 73316.525, 'train_steps_per_second': 1145.493, 'train_loss': 0.0, 'epoch': 3.0}
20
+ 0%| | 0/9720 [00:04<?, ?it/s]
21
+ Skipping the first batches: : 0it [00:08, ?it/s] | 0/9720 [00:04<?, ?it/s]
22
+ Saving model checkpoint to ./
23
+ Configuration saved in ./config.json
24
+ Model weights saved in ./pytorch_model.bin
25
+ Feature extractor saved in ./preprocessor_config.json
26
+ Saving model checkpoint to ./
27
+ Configuration saved in ./config.json
28
+ Model weights saved in ./pytorch_model.bin
wandb/run-20220505_164550-0sgg5024/files/requirements.txt ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.0.0
2
+ aiohttp==3.8.1
3
+ aiosignal==1.2.0
4
+ alembic==1.7.7
5
+ anyio==3.5.0
6
+ appdirs==1.4.4
7
+ apscheduler==3.9.1
8
+ argon2-cffi-bindings==21.2.0
9
+ argon2-cffi==21.3.0
10
+ arrow==1.2.2
11
+ asttokens==2.0.5
12
+ astunparse==1.6.3
13
+ async-timeout==4.0.2
14
+ attrs==21.4.0
15
+ audioread==2.1.9
16
+ autopage==0.5.0
17
+ babel==2.9.1
18
+ backcall==0.2.0
19
+ backoff==1.11.1
20
+ binaryornot==0.4.4
21
+ bitsandbytes-cuda113==0.26.0
22
+ black==22.1.0
23
+ bleach==4.1.0
24
+ boto3==1.16.34
25
+ botocore==1.19.63
26
+ brotli==1.0.9
27
+ cachetools==5.0.0
28
+ certifi==2021.10.8
29
+ cffi==1.15.0
30
+ chardet==4.0.0
31
+ charset-normalizer==2.0.11
32
+ chex==0.1.0
33
+ click==8.0.3
34
+ cliff==3.10.1
35
+ clldutils==3.10.1
36
+ cmaes==0.8.2
37
+ cmd2==2.4.0
38
+ codecarbon==1.2.0
39
+ colorlog==6.6.0
40
+ cookiecutter==1.7.3
41
+ cryptography==36.0.2
42
+ csvw==1.11.0
43
+ cycler==0.11.0
44
+ dash-bootstrap-components==1.1.0
45
+ dash-core-components==2.0.0
46
+ dash-html-components==2.0.0
47
+ dash-table==5.0.0
48
+ dash==2.3.1
49
+ datasets==2.1.1.dev0
50
+ debugpy==1.5.1
51
+ decorator==5.1.1
52
+ defusedxml==0.7.1
53
+ deprecated==1.2.13
54
+ dill==0.3.4
55
+ dlinfo==1.2.1
56
+ dm-tree==0.1.6
57
+ docker-pycreds==0.4.0
58
+ docker==4.4.4
59
+ entrypoints==0.4
60
+ execnet==1.9.0
61
+ executing==0.8.2
62
+ faiss-cpu==1.7.2
63
+ filelock==3.4.2
64
+ fire==0.4.0
65
+ flake8==4.0.1
66
+ flask-compress==1.11
67
+ flask==2.1.1
68
+ flatbuffers==1.12
69
+ flax==0.4.0
70
+ fonttools==4.29.1
71
+ frozenlist==1.3.0
72
+ fsspec==2022.1.0
73
+ fugashi==1.1.2
74
+ gast==0.5.3
75
+ gitdb==4.0.9
76
+ gitpython==3.1.18
77
+ google-auth-oauthlib==0.4.6
78
+ google-auth==2.6.0
79
+ google-pasta==0.2.0
80
+ greenlet==1.1.2
81
+ grpcio==1.43.0
82
+ h5py==3.6.0
83
+ hf-doc-builder==0.2.0
84
+ huggingface-hub==0.4.0
85
+ hypothesis==6.36.1
86
+ idna==3.3
87
+ importlib-metadata==4.10.1
88
+ iniconfig==1.1.1
89
+ ipadic==1.0.0
90
+ ipdb==0.13.9
91
+ ipykernel==6.8.0
92
+ ipython-genutils==0.2.0
93
+ ipython==8.0.1
94
+ ipywidgets==7.6.5
95
+ isodate==0.6.1
96
+ isort==5.10.1
97
+ itsdangerous==2.1.2
98
+ jax==0.2.28
99
+ jaxlib==0.1.76+cuda11.cudnn82
100
+ jedi==0.18.1
101
+ jinja2-time==0.2.0
102
+ jinja2==3.0.3
103
+ jiwer==2.3.0
104
+ jmespath==0.10.0
105
+ joblib==1.1.0
106
+ json5==0.9.6
107
+ jsonschema==4.4.0
108
+ jupyter-client==7.1.2
109
+ jupyter-console==6.4.0
110
+ jupyter-core==4.9.1
111
+ jupyter-server==1.13.5
112
+ jupyter==1.0.0
113
+ jupyterlab-pygments==0.1.2
114
+ jupyterlab-server==2.10.3
115
+ jupyterlab-widgets==1.0.2
116
+ jupyterlab==3.2.9
117
+ keras-preprocessing==1.1.2
118
+ keras==2.8.0
119
+ kiwisolver==1.3.2
120
+ kubernetes==12.0.1
121
+ libclang==13.0.0
122
+ librosa==0.8.1
123
+ llvmlite==0.38.0
124
+ mako==1.2.0
125
+ markdown==3.3.6
126
+ markupsafe==2.0.1
127
+ matplotlib-inline==0.1.3
128
+ matplotlib==3.5.1
129
+ mccabe==0.6.1
130
+ mistune==0.8.4
131
+ msgpack==1.0.3
132
+ multidict==6.0.2
133
+ multiprocess==0.70.12.2
134
+ mypy-extensions==0.4.3
135
+ nbclassic==0.3.5
136
+ nbclient==0.5.10
137
+ nbconvert==6.4.1
138
+ nbformat==5.1.3
139
+ nest-asyncio==1.5.4
140
+ nltk==3.7
141
+ notebook==6.4.8
142
+ numba==0.55.1
143
+ numpy==1.21.5
144
+ oauthlib==3.2.0
145
+ onnx==1.11.0
146
+ onnxconverter-common==1.9.0
147
+ opt-einsum==3.3.0
148
+ optax==0.1.0
149
+ optuna==2.10.0
150
+ packaging==21.3
151
+ pandas==1.4.0
152
+ pandocfilters==1.5.0
153
+ parameterized==0.8.1
154
+ parso==0.8.3
155
+ pathspec==0.9.0
156
+ pathtools==0.1.2
157
+ pbr==5.8.1
158
+ pexpect==4.8.0
159
+ phonemizer==3.0.1
160
+ pickleshare==0.7.5
161
+ pillow==9.0.0
162
+ pint==0.16.1
163
+ pip==22.0.2
164
+ pkg-resources==0.0.0
165
+ plac==1.3.5
166
+ platformdirs==2.4.1
167
+ plotly==5.6.0
168
+ pluggy==1.0.0
169
+ pooch==1.6.0
170
+ portalocker==2.0.0
171
+ poyo==0.5.0
172
+ prettytable==3.2.0
173
+ prometheus-client==0.13.1
174
+ promise==2.3
175
+ prompt-toolkit==3.0.26
176
+ protobuf==3.19.4
177
+ psutil==5.9.0
178
+ ptyprocess==0.7.0
179
+ pure-eval==0.2.2
180
+ py-cpuinfo==8.0.0
181
+ py==1.11.0
182
+ pyarrow==6.0.1
183
+ pyasn1-modules==0.2.8
184
+ pyasn1==0.4.8
185
+ pycodestyle==2.8.0
186
+ pycparser==2.21
187
+ pyctcdecode==0.3.0
188
+ pyflakes==2.4.0
189
+ pygments==2.11.2
190
+ pygtrie==2.4.2
191
+ pynvml==11.4.1
192
+ pyopenssl==22.0.0
193
+ pyparsing==3.0.7
194
+ pyperclip==1.8.2
195
+ pypng==0.0.21
196
+ pyrsistent==0.18.1
197
+ pytest-forked==1.4.0
198
+ pytest-timeout==2.1.0
199
+ pytest-xdist==2.5.0
200
+ pytest==7.1.1
201
+ python-dateutil==2.8.2
202
+ python-levenshtein==0.12.2
203
+ python-slugify==6.1.1
204
+ pytz-deprecation-shim==0.1.0.post0
205
+ pytz==2021.3
206
+ pyyaml==5.4.1
207
+ pyzmq==22.3.0
208
+ qtconsole==5.2.2
209
+ qtpy==2.0.1
210
+ ray==1.11.0
211
+ redis==4.2.2
212
+ regex==2022.1.18
213
+ requests-oauthlib==1.3.1
214
+ requests==2.27.1
215
+ resampy==0.2.2
216
+ responses==0.18.0
217
+ rfc3986==2.0.0
218
+ rouge-score==0.0.4
219
+ rsa==4.8
220
+ s3transfer==0.3.7
221
+ sacrebleu==1.5.1
222
+ sacremoses==0.0.47
223
+ scikit-learn==1.0.2
224
+ scipy==1.7.3
225
+ segments==2.2.0
226
+ send2trash==1.8.0
227
+ sentencepiece==0.1.96
228
+ sentry-sdk==1.5.6
229
+ setuptools==44.1.1
230
+ shortuuid==1.0.8
231
+ sigopt==8.3.0
232
+ six==1.16.0
233
+ smmap==5.0.0
234
+ sniffio==1.2.0
235
+ sortedcontainers==2.4.0
236
+ soundfile==0.10.3.post1
237
+ sqlalchemy==1.4.34
238
+ stack-data==0.1.4
239
+ stevedore==3.5.0
240
+ tabulate==0.8.9
241
+ tenacity==8.0.1
242
+ tensorboard-data-server==0.6.1
243
+ tensorboard-plugin-wit==1.8.1
244
+ tensorboard==2.8.0
245
+ tensorboardx==2.5
246
+ tensorflow-io-gcs-filesystem==0.24.0
247
+ tensorflow==2.8.0
248
+ termcolor==1.1.0
249
+ terminado==0.13.1
250
+ testpath==0.5.0
251
+ text-unidecode==1.3
252
+ tf-estimator-nightly==2.8.0.dev2021122109
253
+ tf2onnx==1.9.3
254
+ threadpoolctl==3.1.0
255
+ timeout-decorator==0.5.0
256
+ timm==0.5.4
257
+ tokenizers==0.11.4
258
+ toml==0.10.2
259
+ tomli==2.0.0
260
+ toolz==0.11.2
261
+ torch==1.10.2+cu113
262
+ torchaudio==0.10.2+cu113
263
+ torchvision==0.11.3
264
+ tornado==6.1
265
+ tqdm==4.62.3
266
+ traitlets==5.1.1
267
+ transformers==4.18.0.dev0
268
+ typing-extensions==3.10.0.2
269
+ tzdata==2022.1
270
+ tzlocal==4.2
271
+ unidic-lite==1.0.8
272
+ unidic==1.1.0
273
+ uritemplate==4.1.1
274
+ urllib3==1.26.8
275
+ wandb==0.12.10
276
+ wasabi==0.9.1
277
+ wcwidth==0.2.5
278
+ webencodings==0.5.1
279
+ websocket-client==1.2.3
280
+ werkzeug==2.0.2
281
+ wheel==0.37.1
282
+ widgetsnbextension==3.5.2
283
+ wrapt==1.14.0
284
+ xxhash==2.0.2
285
+ yarl==1.7.2
286
+ yaspin==2.1.0
287
+ zipp==3.7.0
wandb/run-20220505_164550-0sgg5024/files/wandb-metadata.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33",
3
+ "python": "3.9.5",
4
+ "heartbeatAt": "2022-05-05T16:45:53.812892",
5
+ "startedAt": "2022-05-05T16:45:50.222861",
6
+ "docker": null,
7
+ "gpu": "Tesla V100-SXM2-16GB",
8
+ "gpu_count": 2,
9
+ "cpu_count": 16,
10
+ "cuda": null,
11
+ "args": [
12
+ "--overwrite_output_dir",
13
+ "--freeze_feature_encoder",
14
+ "--gradient_checkpointing",
15
+ "--predict_with_generate",
16
+ "--fp16",
17
+ "--group_by_length",
18
+ "--do_train",
19
+ "--do_eval",
20
+ "--load_best_model_at_end",
21
+ "--push_to_hub",
22
+ "--use_auth_token",
23
+ "--eval_split_name=test",
24
+ "--eval_steps=500",
25
+ "--evaluation_strategy=steps",
26
+ "--generation_max_length=40",
27
+ "--generation_num_beams=1",
28
+ "--gradient_accumulation_steps=8",
29
+ "--greater_is_better=True",
30
+ "--hidden_dropout=0.1043496520848404",
31
+ "--language=fr.en",
32
+ "--learning_rate=0.00023215434357723729",
33
+ "--logging_steps=1",
34
+ "--max_duration_in_seconds=20",
35
+ "--metric_for_best_model=bleu",
36
+ "--model_name_or_path=./",
37
+ "--num_train_epochs=3",
38
+ "--output_dir=./",
39
+ "--per_device_eval_batch_size=8",
40
+ "--per_device_train_batch_size=8",
41
+ "--save_steps=500",
42
+ "--task=covost2",
43
+ "--warmup_steps=500"
44
+ ],
45
+ "state": "running",
46
+ "program": "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/run_xtreme_s.py",
47
+ "codePath": "run_xtreme_s.py",
48
+ "git": {
49
+ "remote": "https://huggingface.co/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en",
50
+ "commit": "c65f244178189fecf00ca91b5a669156c1a1e311"
51
+ },
52
+ "email": "sanchit@huggingface.co",
53
+ "root": "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en",
54
+ "host": "sanchit--v100",
55
+ "username": "sanchit_huggingface_co",
56
+ "executable": "/home/sanchit_huggingface_co/gcp/bin/python3"
57
+ }
wandb/run-20220505_164550-0sgg5024/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {"train/train_runtime": 8.4854, "train/train_samples_per_second": 73316.525, "train/train_steps_per_second": 1145.493, "train/total_flos": 0.0, "train/train_loss": 0.0, "train/epoch": 3.0, "train/global_step": 9720, "_runtime": 8, "_timestamp": 1651769158, "_step": 0}
wandb/run-20220505_164550-0sgg5024/logs/debug-internal.log ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-05-05 16:45:51,278 INFO MainThread:66789 [internal.py:wandb_internal():89] W&B internal server running at pid: 66789, started at: 2022-05-05 16:45:51.278170
2
+ 2022-05-05 16:45:51,280 DEBUG HandlerThread:66789 [handler.py:handle_request():131] handle_request: check_version
3
+ 2022-05-05 16:45:51,280 INFO WriterThread:66789 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/run-0sgg5024.wandb
4
+ 2022-05-05 16:45:51,282 DEBUG SenderThread:66789 [sender.py:send():235] send: header
5
+ 2022-05-05 16:45:51,282 DEBUG SenderThread:66789 [sender.py:send_request():249] send_request: check_version
6
+ 2022-05-05 16:45:51,347 DEBUG SenderThread:66789 [sender.py:send():235] send: run
7
+ 2022-05-05 16:45:51,468 INFO SenderThread:66789 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files
8
+ 2022-05-05 16:45:51,468 INFO SenderThread:66789 [sender.py:_start_run_threads():809] run started: 0sgg5024 with start time 1651769150
9
+ 2022-05-05 16:45:51,468 DEBUG SenderThread:66789 [sender.py:send():235] send: summary
10
+ 2022-05-05 16:45:51,469 INFO SenderThread:66789 [sender.py:_save_file():944] saving file wandb-summary.json with policy end
11
+ 2022-05-05 16:45:51,470 DEBUG HandlerThread:66789 [handler.py:handle_request():131] handle_request: run_start
12
+ 2022-05-05 16:45:52,471 INFO Thread-8 :66789 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/wandb-summary.json
13
+ 2022-05-05 16:45:53,812 DEBUG HandlerThread:66789 [meta.py:__init__():36] meta init
14
+ 2022-05-05 16:45:53,812 DEBUG HandlerThread:66789 [meta.py:__init__():50] meta init done
15
+ 2022-05-05 16:45:53,812 DEBUG HandlerThread:66789 [meta.py:probe():210] probe
16
+ 2022-05-05 16:45:53,819 DEBUG HandlerThread:66789 [meta.py:_setup_git():200] setup git
17
+ 2022-05-05 16:45:53,850 DEBUG HandlerThread:66789 [meta.py:_setup_git():207] setup git done
18
+ 2022-05-05 16:45:53,850 DEBUG HandlerThread:66789 [meta.py:_save_pip():54] save pip
19
+ 2022-05-05 16:45:53,851 DEBUG HandlerThread:66789 [meta.py:_save_pip():68] save pip done
20
+ 2022-05-05 16:45:53,851 DEBUG HandlerThread:66789 [meta.py:probe():248] probe done
21
+ 2022-05-05 16:45:53,854 DEBUG SenderThread:66789 [sender.py:send():235] send: files
22
+ 2022-05-05 16:45:53,854 INFO SenderThread:66789 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now
23
+ 2022-05-05 16:45:53,860 DEBUG HandlerThread:66789 [handler.py:handle_request():131] handle_request: stop_status
24
+ 2022-05-05 16:45:53,861 DEBUG SenderThread:66789 [sender.py:send_request():249] send_request: stop_status
25
+ 2022-05-05 16:45:53,902 DEBUG SenderThread:66789 [sender.py:send():235] send: config
26
+ 2022-05-05 16:45:53,903 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
27
+ 2022-05-05 16:45:53,903 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
28
+ 2022-05-05 16:45:53,903 WARNING SenderThread:66789 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen)
29
+ 2022-05-05 16:45:54,116 INFO Thread-11 :66789 [upload_job.py:push():137] Uploaded file /tmp/tmplrauj_x0wandb/1ian821r-wandb-metadata.json
30
+ 2022-05-05 16:45:54,472 INFO Thread-8 :66789 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/wandb-metadata.json
31
+ 2022-05-05 16:45:54,473 INFO Thread-8 :66789 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/requirements.txt
32
+ 2022-05-05 16:45:54,473 INFO Thread-8 :66789 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/output.log
33
+ 2022-05-05 16:45:56,473 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/output.log
34
+ 2022-05-05 16:45:58,398 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
35
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
36
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
37
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
38
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
39
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: metric
40
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: history
41
+ 2022-05-05 16:45:58,399 DEBUG SenderThread:66789 [sender.py:send():235] send: summary
42
+ 2022-05-05 16:45:58,400 INFO SenderThread:66789 [sender.py:_save_file():944] saving file wandb-summary.json with policy end
43
+ 2022-05-05 16:45:58,474 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/output.log
44
+ 2022-05-05 16:45:58,474 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/wandb-summary.json
45
+ 2022-05-05 16:46:00,475 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/output.log
46
+ 2022-05-05 16:46:08,914 DEBUG HandlerThread:66789 [handler.py:handle_request():131] handle_request: stop_status
47
+ 2022-05-05 16:46:08,915 DEBUG SenderThread:66789 [sender.py:send_request():249] send_request: stop_status
48
+ 2022-05-05 16:46:12,479 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/output.log
49
+ 2022-05-05 16:46:22,227 DEBUG SenderThread:66789 [sender.py:send():235] send: stats
50
+ 2022-05-05 16:46:22,484 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/config.yaml
51
+ 2022-05-05 16:46:23,956 DEBUG HandlerThread:66789 [handler.py:handle_request():131] handle_request: stop_status
52
+ 2022-05-05 16:46:23,956 DEBUG SenderThread:66789 [sender.py:send_request():249] send_request: stop_status
53
+ 2022-05-05 16:46:24,484 INFO Thread-8 :66789 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/files/output.log
54
+ 2022-05-05 16:46:38,992 DEBUG HandlerThread:66789 [handler.py:handle_request():131] handle_request: stop_status
55
+ 2022-05-05 16:46:38,992 DEBUG SenderThread:66789 [sender.py:send_request():249] send_request: stop_status
wandb/run-20220505_164550-0sgg5024/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-05-05 16:45:50,224 INFO MainThread:66677 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings
2
+ 2022-05-05 16:45:50,224 INFO MainThread:66677 [wandb_setup.py:_flush():75] Loading settings from wandb/settings
3
+ 2022-05-05 16:45:50,224 INFO MainThread:66677 [wandb_setup.py:_flush():75] Loading settings from environment variables: {'entity': 'sanchit-gandhi', 'project': 'xtreme_s_xlsr_2_bart_covost2_fr_en', 'sweep_id': 'pvyx3mpp', 'root_dir': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en', 'run_id': '0sgg5024', 'sweep_param_path': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/sweep-pvyx3mpp/config-0sgg5024.yaml'}
4
+ 2022-05-05 16:45:50,224 INFO MainThread:66677 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_xtreme_s.py', 'program': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/run_xtreme_s.py'}
5
+ 2022-05-05 16:45:50,225 INFO MainThread:66677 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/logs/debug.log
6
+ 2022-05-05 16:45:50,225 INFO MainThread:66677 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_164550-0sgg5024/logs/debug-internal.log
7
+ 2022-05-05 16:45:50,225 INFO MainThread:66677 [wandb_init.py:init():420] calling init triggers
8
+ 2022-05-05 16:45:50,225 INFO MainThread:66677 [wandb_init.py:init():425] wandb.init called with sweep_config: {'eval_split_name': 'test', 'eval_steps': 500, 'evaluation_strategy': 'steps', 'generation_max_length': 40, 'generation_num_beams': 1, 'gradient_accumulation_steps': 8, 'greater_is_better': True, 'hidden_dropout': 0.1043496520848404, 'language': 'fr.en', 'learning_rate': 0.00023215434357723729, 'logging_steps': 1, 'max_duration_in_seconds': 20, 'metric_for_best_model': 'bleu', 'model_name_or_path': './', 'num_train_epochs': 3, 'output_dir': './', 'per_device_eval_batch_size': 8, 'per_device_train_batch_size': 8, 'save_steps': 500, 'task': 'covost2', 'warmup_steps': 500}
9
+ config: {}
10
+ 2022-05-05 16:45:50,225 INFO MainThread:66677 [wandb_init.py:init():471] starting backend
11
+ 2022-05-05 16:45:50,225 INFO MainThread:66677 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
12
+ 2022-05-05 16:45:50,306 INFO MainThread:66677 [backend.py:ensure_launched():219] starting backend process...
13
+ 2022-05-05 16:45:50,387 INFO MainThread:66677 [backend.py:ensure_launched():224] started backend process with pid: 66789
14
+ 2022-05-05 16:45:50,389 INFO MainThread:66677 [wandb_init.py:init():480] backend started and connected
15
+ 2022-05-05 16:45:50,391 INFO MainThread:66677 [wandb_run.py:_config_callback():966] config_cb None None {'eval_split_name': 'test', 'eval_steps': 500, 'evaluation_strategy': 'steps', 'generation_max_length': 40, 'generation_num_beams': 1, 'gradient_accumulation_steps': 8, 'greater_is_better': True, 'hidden_dropout': 0.1043496520848404, 'language': 'fr.en', 'learning_rate': 0.00023215434357723729, 'logging_steps': 1, 'max_duration_in_seconds': 20, 'metric_for_best_model': 'bleu', 'model_name_or_path': './', 'num_train_epochs': 3, 'output_dir': './', 'per_device_eval_batch_size': 8, 'per_device_train_batch_size': 8, 'save_steps': 500, 'task': 'covost2', 'warmup_steps': 500}
16
+ 2022-05-05 16:45:50,405 INFO MainThread:66677 [wandb_init.py:init():550] updated telemetry
17
+ 2022-05-05 16:45:50,597 INFO MainThread:66677 [wandb_init.py:init():581] communicating current version
18
+ 2022-05-05 16:45:51,345 INFO MainThread:66677 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.16 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
19
+
20
+ 2022-05-05 16:45:51,345 INFO MainThread:66677 [wandb_init.py:init():596] communicating run to backend with 30 second timeout
21
+ 2022-05-05 16:45:51,469 INFO MainThread:66677 [wandb_init.py:init():624] starting run threads in backend
22
+ 2022-05-05 16:45:53,860 INFO MainThread:66677 [wandb_run.py:_console_start():1827] atexit reg
23
+ 2022-05-05 16:45:53,861 INFO MainThread:66677 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT
24
+ 2022-05-05 16:45:53,861 INFO MainThread:66677 [wandb_run.py:_redirect():1706] Redirecting console.
25
+ 2022-05-05 16:45:53,863 INFO MainThread:66677 [wandb_run.py:_redirect():1762] Redirects installed.
26
+ 2022-05-05 16:45:53,863 INFO MainThread:66677 [wandb_init.py:init():651] run started, returning control to user process
27
+ 2022-05-05 16:45:53,866 INFO MainThread:66677 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.1043496520848404, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May05_16-44-56_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8}
28
+ 2022-05-05 16:45:53,868 INFO MainThread:66677 [wandb_watch.py:watch():43] Watching
wandb/run-20220505_164550-0sgg5024/run-0sgg5024.wandb ADDED
Binary file (10 kB). View file
wandb/sweep-pvyx3mpp/config-0sgg5024.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ eval_split_name:
4
+ value: test
5
+ eval_steps:
6
+ value: 500
7
+ evaluation_strategy:
8
+ value: steps
9
+ generation_max_length:
10
+ value: 40
11
+ generation_num_beams:
12
+ value: 1
13
+ gradient_accumulation_steps:
14
+ value: 8
15
+ greater_is_better:
16
+ value: true
17
+ hidden_dropout:
18
+ value: 0.1043496520848404
19
+ language:
20
+ value: fr.en
21
+ learning_rate:
22
+ value: 0.00023215434357723729
23
+ logging_steps:
24
+ value: 1
25
+ max_duration_in_seconds:
26
+ value: 20
27
+ metric_for_best_model:
28
+ value: bleu
29
+ model_name_or_path:
30
+ value: ./
31
+ num_train_epochs:
32
+ value: 3
33
+ output_dir:
34
+ value: ./
35
+ per_device_eval_batch_size:
36
+ value: 8
37
+ per_device_train_batch_size:
38
+ value: 8
39
+ save_steps:
40
+ value: 500
41
+ task:
42
+ value: covost2
43
+ warmup_steps:
44
+ value: 500