sanchit-gandhi HF staff commited on
Commit
f024e72
1 Parent(s): 0bc8bde

Model save

Browse files
config.json CHANGED
@@ -182,7 +182,7 @@
182
  "forced_eos_token_id": null,
183
  "gradient_checkpointing": false,
184
  "hidden_act": "gelu",
185
- "hidden_dropout": 0.035938233699532036,
186
  "hidden_size": 1024,
187
  "id2label": {
188
  "0": "LABEL_0",
182
  "forced_eos_token_id": null,
183
  "gradient_checkpointing": false,
184
  "hidden_act": "gelu",
185
+ "hidden_dropout": 0.18004101365999406,
186
  "hidden_size": 1024,
187
  "id2label": {
188
  "0": "LABEL_0",
emissions.csv CHANGED
@@ -1,2 +1,3 @@
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2022-05-05T16:24:44,d1ee5246-da5a-4bbb-a48e-edf347d44452,codecarbon,93789.2286529541,4.4657849913272685,7.885899684491028,USA,USA,Iowa,Y,gcp,us-central1
 
1
  timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
  2022-05-05T16:24:44,d1ee5246-da5a-4bbb-a48e-edf347d44452,codecarbon,93789.2286529541,4.4657849913272685,7.885899684491028,USA,USA,Iowa,Y,gcp,us-central1
3
+ 2022-05-05T16:37:38,3042b653-af40-4cc1-8eec-061528a59ed7,codecarbon,10.825525283813477,0.00029573218373195776,0.0005222182301464909,USA,USA,Iowa,Y,gcp,us-central1
runs/May05_16-32-27_sanchit--v100/1651768642.7722313/events.out.tfevents.1651768642.sanchit--v100.65172.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe9010d25b5426ae74e1154e5be2727f4b9d3dc3d3ac8423b4e36ab102ca5b1
3
+ size 5184
runs/May05_16-32-27_sanchit--v100/events.out.tfevents.1651768642.sanchit--v100.65172.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dffb33c2552c28d9cdf40229f277c5ce3d0f56c41496a3ad6c1fcdd0e08ec2d4
3
+ size 10206
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54311328073baaf4c9576b62de98f28bcc986e980c3c849a9409edd618d1e27f
3
  size 3247
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53a6a57e9108be1580a99fd7bb15879abb87d024ec5fa14575c3b7fb1db16f0
3
  size 3247
wandb/debug-cli.log CHANGED
@@ -80,3 +80,29 @@
80
  2022-05-05 09:24:03 ERROR {"errors":[{"message":"context deadline exceeded"}]}
81
  2022-05-05 09:25:36 ERROR 500 response executing GraphQL.
82
  2022-05-05 09:25:36 ERROR {"errors":[{"message":"context deadline exceeded"}]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  2022-05-05 09:24:03 ERROR {"errors":[{"message":"context deadline exceeded"}]}
81
  2022-05-05 09:25:36 ERROR 500 response executing GraphQL.
82
  2022-05-05 09:25:36 ERROR {"errors":[{"message":"context deadline exceeded"}]}
83
+ 2022-05-05 16:32:16 INFO Cleaning up finished run: 1tmxz74i
84
+ 2022-05-05 16:32:16 INFO Agent received command: run
85
+ 2022-05-05 16:32:16 INFO Agent starting run with config:
86
+ eval_split_name: test
87
+ eval_steps: 500
88
+ evaluation_strategy: steps
89
+ generation_max_length: 40
90
+ generation_num_beams: 1
91
+ gradient_accumulation_steps: 8
92
+ greater_is_better: True
93
+ hidden_dropout: 0.18004101365999406
94
+ language: fr.en
95
+ learning_rate: 0.0002757119755681108
96
+ logging_steps: 1
97
+ max_duration_in_seconds: 20
98
+ metric_for_best_model: bleu
99
+ model_name_or_path: ./
100
+ num_train_epochs: 3
101
+ output_dir: ./
102
+ per_device_eval_batch_size: 8
103
+ per_device_train_batch_size: 8
104
+ save_steps: 500
105
+ task: covost2
106
+ warmup_steps: 500
107
+ 2022-05-05 16:32:16 INFO About to run command: python3 run_xtreme_s.py --overwrite_output_dir --freeze_feature_encoder --gradient_checkpointing --predict_with_generate --fp16 --group_by_length --do_train --do_eval --load_best_model_at_end --push_to_hub --use_auth_token --eval_split_name=test --eval_steps=500 --evaluation_strategy=steps --generation_max_length=40 --generation_num_beams=1 --gradient_accumulation_steps=8 --greater_is_better=True --hidden_dropout=0.18004101365999406 --language=fr.en --learning_rate=0.0002757119755681108 --logging_steps=1 --max_duration_in_seconds=20 --metric_for_best_model=bleu --model_name_or_path=./ --num_train_epochs=3 --output_dir=./ --per_device_eval_batch_size=8 --per_device_train_batch_size=8 --save_steps=500 --task=covost2 --warmup_steps=500
108
+ 2022-05-05 16:32:22 INFO Running runs: ['urfp82ib']
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20220504_142129-1tmxz74i/logs/debug-internal.log
1
+ run-20220505_163723-urfp82ib/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20220504_142129-1tmxz74i/logs/debug.log
1
+ run-20220505_163723-urfp82ib/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20220504_142129-1tmxz74i
1
+ run-20220505_163723-urfp82ib
wandb/run-20220504_142129-1tmxz74i/files/config.yaml CHANGED
@@ -8672,7 +8672,16 @@ _wandb:
8672
  - 5
8673
  - 11
8674
  - 12
 
 
 
 
 
 
 
8675
  3:
 
 
8676
  - 13
8677
  4: 3.9.5
8678
  5: 0.12.10
8672
  - 5
8673
  - 11
8674
  - 12
8675
+ 2:
8676
+ - 1
8677
+ - 2
8678
+ - 3
8679
+ - 5
8680
+ - 11
8681
+ - 12
8682
  3:
8683
+ - 1
8684
+ - 7
8685
  - 13
8686
  4: 3.9.5
8687
  5: 0.12.10
wandb/run-20220504_142129-1tmxz74i/files/output.log CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e18202a1774364d9bce0ad6eaa7228beac26c34380f4e6e745afa0fe739feae8
3
- size 20977130
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1786cfcbc8ee6cb2c762b8872e52a8706b47ba5aefb46b3b3e33372a571f67
3
+ size 20996768
wandb/run-20220504_142129-1tmxz74i/files/wandb-summary.json CHANGED
The diff for this file is too large to render. See raw diff
wandb/run-20220504_142129-1tmxz74i/logs/debug-internal.log CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91cc8d775a6d70e558fd760ecd96b3f748bd6f4161c4eb4b573a07aa34fe57dc
3
- size 26053100
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca0a284494378c54882b003ce1468e806df7176e30a9d1763d9e6b9d44e52e1
3
+ size 26078926
wandb/run-20220504_142129-1tmxz74i/logs/debug.log CHANGED
@@ -26,3 +26,165 @@ config: {}
26
  2022-05-04 14:21:35,284 INFO MainThread:50375 [wandb_init.py:init():651] run started, returning control to user process
27
  2022-05-04 14:21:35,288 INFO MainThread:50375 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.035938233699532036, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May04_13-30-37_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8}
28
  2022-05-04 14:21:35,292 INFO MainThread:50375 [wandb_watch.py:watch():43] Watching
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  2022-05-04 14:21:35,284 INFO MainThread:50375 [wandb_init.py:init():651] run started, returning control to user process
27
  2022-05-04 14:21:35,288 INFO MainThread:50375 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.035938233699532036, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May04_13-30-37_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8}
28
  2022-05-04 14:21:35,292 INFO MainThread:50375 [wandb_watch.py:watch():43] Watching
29
+ 2022-05-05 16:31:21,082 INFO MainThread:50375 [wandb_run.py:_atexit_cleanup():1797] got exitcode: 1
30
+ 2022-05-05 16:31:21,089 INFO MainThread:50375 [wandb_run.py:_restore():1769] restore
31
+ 2022-05-05 16:31:23,715 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
32
+ wandb_count: 1
33
+ }
34
+ pusher_stats {
35
+ uploaded_bytes: 1972
36
+ total_bytes: 1972
37
+ }
38
+
39
+ 2022-05-05 16:31:23,834 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
40
+ wandb_count: 1
41
+ }
42
+ pusher_stats {
43
+ uploaded_bytes: 1972
44
+ total_bytes: 1972
45
+ }
46
+
47
+ 2022-05-05 16:31:23,967 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
48
+ wandb_count: 1
49
+ }
50
+ pusher_stats {
51
+ uploaded_bytes: 1972
52
+ total_bytes: 1972
53
+ }
54
+
55
+ 2022-05-05 16:31:24,862 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
56
+ wandb_count: 1
57
+ }
58
+ pusher_stats {
59
+ uploaded_bytes: 1972
60
+ total_bytes: 1972
61
+ }
62
+
63
+ 2022-05-05 16:31:25,840 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
64
+ wandb_count: 5
65
+ }
66
+ pusher_stats {
67
+ uploaded_bytes: 1972
68
+ total_bytes: 22539656
69
+ }
70
+
71
+ 2022-05-05 16:31:25,942 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
72
+ wandb_count: 5
73
+ }
74
+ pusher_stats {
75
+ uploaded_bytes: 1972
76
+ total_bytes: 22539656
77
+ }
78
+
79
+ 2022-05-05 16:31:26,045 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
80
+ wandb_count: 5
81
+ }
82
+ pusher_stats {
83
+ uploaded_bytes: 12347603
84
+ total_bytes: 22539656
85
+ }
86
+
87
+ 2022-05-05 16:31:26,149 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
88
+ wandb_count: 5
89
+ }
90
+ pusher_stats {
91
+ uploaded_bytes: 21261171
92
+ total_bytes: 22539656
93
+ }
94
+
95
+ 2022-05-05 16:31:26,251 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
96
+ wandb_count: 5
97
+ }
98
+ pusher_stats {
99
+ uploaded_bytes: 21261171
100
+ total_bytes: 22539656
101
+ }
102
+
103
+ 2022-05-05 16:31:26,354 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
104
+ wandb_count: 5
105
+ }
106
+ pusher_stats {
107
+ uploaded_bytes: 21261171
108
+ total_bytes: 22539656
109
+ }
110
+
111
+ 2022-05-05 16:31:26,456 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
112
+ wandb_count: 5
113
+ }
114
+ pusher_stats {
115
+ uploaded_bytes: 21261171
116
+ total_bytes: 22539656
117
+ }
118
+
119
+ 2022-05-05 16:31:26,558 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
120
+ wandb_count: 5
121
+ }
122
+ pusher_stats {
123
+ uploaded_bytes: 21261171
124
+ total_bytes: 22539656
125
+ }
126
+
127
+ 2022-05-05 16:31:26,660 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
128
+ wandb_count: 5
129
+ }
130
+ pusher_stats {
131
+ uploaded_bytes: 21261171
132
+ total_bytes: 22539656
133
+ }
134
+
135
+ 2022-05-05 16:31:26,762 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
136
+ wandb_count: 5
137
+ }
138
+ pusher_stats {
139
+ uploaded_bytes: 22539656
140
+ total_bytes: 22539656
141
+ }
142
+
143
+ 2022-05-05 16:31:26,864 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
144
+ wandb_count: 5
145
+ }
146
+ pusher_stats {
147
+ uploaded_bytes: 22539656
148
+ total_bytes: 22539656
149
+ }
150
+
151
+ 2022-05-05 16:31:26,966 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
152
+ wandb_count: 5
153
+ }
154
+ pusher_stats {
155
+ uploaded_bytes: 22539656
156
+ total_bytes: 22539656
157
+ }
158
+
159
+ 2022-05-05 16:31:27,068 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
160
+ wandb_count: 5
161
+ }
162
+ pusher_stats {
163
+ uploaded_bytes: 22539656
164
+ total_bytes: 22539656
165
+ }
166
+
167
+ 2022-05-05 16:31:27,223 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: file_counts {
168
+ wandb_count: 5
169
+ }
170
+ pusher_stats {
171
+ uploaded_bytes: 22539656
172
+ total_bytes: 22539656
173
+ }
174
+
175
+ 2022-05-05 16:31:27,397 INFO MainThread:50375 [wandb_run.py:_wait_for_finish():1929] got exit ret: done: true
176
+ exit_result {
177
+ }
178
+ file_counts {
179
+ wandb_count: 5
180
+ }
181
+ pusher_stats {
182
+ uploaded_bytes: 22539656
183
+ total_bytes: 22539656
184
+ }
185
+ local_info {
186
+ }
187
+
188
+ 2022-05-05 16:31:29,982 INFO MainThread:50375 [wandb_run.py:_append_history():2144] rendering history
189
+ 2022-05-05 16:31:29,983 INFO MainThread:50375 [wandb_run.py:_append_summary():2102] rendering summary
190
+ 2022-05-05 16:31:29,984 INFO MainThread:50375 [wandb_run.py:_append_files():2194] logging synced files
wandb/run-20220504_142129-1tmxz74i/run-1tmxz74i.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2baef3bd136aaa11e389d892bacb30c5787387e9c38e8dbd3aa9b4ef5fc2047d
3
- size 1047705346
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a48d6c37d8c3c4ab228d97cd5a6b8130cc17aec3c854afb79602452e30cbfe08
3
+ size 1049011321
wandb/run-20220505_163723-urfp82ib/files/config.yaml ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _n_gpu:
4
+ desc: null
5
+ value: 1
6
+ _name_or_path:
7
+ desc: null
8
+ value: ./
9
+ _wandb:
10
+ desc: null
11
+ value:
12
+ cli_version: 0.12.10
13
+ framework: huggingface
14
+ huggingface_version: 4.19.0.dev0
15
+ is_jupyter_run: false
16
+ is_kaggle_kernel: false
17
+ m:
18
+ - 1: train/global_step
19
+ 6:
20
+ - 3
21
+ - 1: train/train_runtime
22
+ 5: 1
23
+ 6:
24
+ - 1
25
+ - 1: train/train_samples_per_second
26
+ 5: 1
27
+ 6:
28
+ - 1
29
+ - 1: train/train_steps_per_second
30
+ 5: 1
31
+ 6:
32
+ - 1
33
+ - 1: train/total_flos
34
+ 5: 1
35
+ 6:
36
+ - 1
37
+ - 1: train/train_loss
38
+ 5: 1
39
+ 6:
40
+ - 1
41
+ - 1: train/epoch
42
+ 5: 1
43
+ 6:
44
+ - 1
45
+ python_version: 3.9.5
46
+ start_time: 1651768643
47
+ t:
48
+ 1:
49
+ - 1
50
+ - 2
51
+ - 3
52
+ - 5
53
+ - 11
54
+ - 12
55
+ 3:
56
+ - 13
57
+ 4: 3.9.5
58
+ 5: 0.12.10
59
+ 6: 4.19.0.dev0
60
+ 8:
61
+ - 5
62
+ adafactor:
63
+ desc: null
64
+ value: false
65
+ adam_beta1:
66
+ desc: null
67
+ value: 0.9
68
+ adam_beta2:
69
+ desc: null
70
+ value: 0.999
71
+ adam_epsilon:
72
+ desc: null
73
+ value: 1.0e-08
74
+ add_cross_attention:
75
+ desc: null
76
+ value: false
77
+ architectures:
78
+ desc: null
79
+ value:
80
+ - SpeechEncoderDecoderModel
81
+ bad_words_ids:
82
+ desc: null
83
+ value: null
84
+ bf16:
85
+ desc: null
86
+ value: false
87
+ bf16_full_eval:
88
+ desc: null
89
+ value: false
90
+ bos_token_id:
91
+ desc: null
92
+ value: null
93
+ chunk_size_feed_forward:
94
+ desc: null
95
+ value: 0
96
+ cross_attention_hidden_size:
97
+ desc: null
98
+ value: null
99
+ data_seed:
100
+ desc: null
101
+ value: None
102
+ dataloader_drop_last:
103
+ desc: null
104
+ value: false
105
+ dataloader_num_workers:
106
+ desc: null
107
+ value: 0
108
+ dataloader_pin_memory:
109
+ desc: null
110
+ value: true
111
+ ddp_bucket_cap_mb:
112
+ desc: null
113
+ value: None
114
+ ddp_find_unused_parameters:
115
+ desc: null
116
+ value: None
117
+ debug:
118
+ desc: null
119
+ value: '[]'
120
+ decoder:
121
+ desc: null
122
+ value:
123
+ _name_or_path: facebook/bart-large
124
+ activation_dropout: 0.1
125
+ activation_function: gelu
126
+ add_bias_logits: false
127
+ add_cross_attention: true
128
+ add_final_layer_norm: false
129
+ architectures:
130
+ - BartModel
131
+ attention_dropout: 0.1
132
+ bad_words_ids: null
133
+ bos_token_id: 0
134
+ chunk_size_feed_forward: 0
135
+ classif_dropout: 0.1
136
+ classifier_dropout: 0.0
137
+ cross_attention_hidden_size: null
138
+ d_model: 1024
139
+ decoder_attention_heads: 16
140
+ decoder_ffn_dim: 4096
141
+ decoder_layerdrop: 0.0
142
+ decoder_layers: 12
143
+ decoder_start_token_id: 2
144
+ diversity_penalty: 0.0
145
+ do_sample: false
146
+ dropout: 0.1
147
+ early_stopping: true
148
+ encoder_attention_heads: 16
149
+ encoder_ffn_dim: 4096
150
+ encoder_layerdrop: 0.0
151
+ encoder_layers: 12
152
+ encoder_no_repeat_ngram_size: 0
153
+ eos_token_id: 2
154
+ exponential_decay_length_penalty: null
155
+ finetuning_task: null
156
+ forced_bos_token_id: 0
157
+ forced_eos_token_id: 2
158
+ gradient_checkpointing: false
159
+ id2label:
160
+ '0': LABEL_0
161
+ '1': LABEL_1
162
+ '2': LABEL_2
163
+ init_std: 0.02
164
+ is_decoder: true
165
+ is_encoder_decoder: false
166
+ label2id:
167
+ LABEL_0: 0
168
+ LABEL_1: 1
169
+ LABEL_2: 2
170
+ length_penalty: 1.0
171
+ max_length: 20
172
+ max_position_embeddings: 1024
173
+ min_length: 0
174
+ model_type: bart
175
+ no_repeat_ngram_size: 3
176
+ normalize_before: false
177
+ num_beam_groups: 1
178
+ num_beams: 4
179
+ num_hidden_layers: 12
180
+ num_return_sequences: 1
181
+ output_attentions: false
182
+ output_hidden_states: false
183
+ output_scores: false
184
+ pad_token_id: 1
185
+ prefix: null
186
+ problem_type: null
187
+ pruned_heads: {}
188
+ remove_invalid_values: false
189
+ repetition_penalty: 1.0
190
+ return_dict: true
191
+ return_dict_in_generate: false
192
+ scale_embedding: false
193
+ sep_token_id: null
194
+ task_specific_params:
195
+ summarization:
196
+ length_penalty: 1.0
197
+ max_length: 128
198
+ min_length: 12
199
+ num_beams: 4
200
+ summarization_cnn:
201
+ length_penalty: 2.0
202
+ max_length: 142
203
+ min_length: 56
204
+ num_beams: 4
205
+ summarization_xsum:
206
+ length_penalty: 1.0
207
+ max_length: 62
208
+ min_length: 11
209
+ num_beams: 6
210
+ temperature: 1.0
211
+ tie_encoder_decoder: false
212
+ tie_word_embeddings: true
213
+ tokenizer_class: null
214
+ top_k: 50
215
+ top_p: 1.0
216
+ torch_dtype: null
217
+ torchscript: false
218
+ transformers_version: 4.19.0.dev0
219
+ typical_p: 1.0
220
+ use_bfloat16: false
221
+ use_cache: true
222
+ vocab_size: 50265
223
+ decoder_start_token_id:
224
+ desc: null
225
+ value: 0
226
+ deepspeed:
227
+ desc: null
228
+ value: None
229
+ disable_tqdm:
230
+ desc: null
231
+ value: false
232
+ diversity_penalty:
233
+ desc: null
234
+ value: 0.0
235
+ do_eval:
236
+ desc: null
237
+ value: true
238
+ do_predict:
239
+ desc: null
240
+ value: false
241
+ do_sample:
242
+ desc: null
243
+ value: false
244
+ do_train:
245
+ desc: null
246
+ value: true
247
+ early_stopping:
248
+ desc: null
249
+ value: false
250
+ encoder:
251
+ desc: null
252
+ value:
253
+ _name_or_path: facebook/wav2vec2-xls-r-300m
254
+ activation_dropout: 0.0
255
+ adapter_kernel_size: 3
256
+ adapter_stride: 2
257
+ add_adapter: true
258
+ add_cross_attention: false
259
+ apply_spec_augment: true
260
+ architectures:
261
+ - Wav2Vec2ForPreTraining
262
+ attention_dropout: 0.1
263
+ bad_words_ids: null
264
+ bos_token_id: 1
265
+ chunk_size_feed_forward: 0
266
+ classifier_proj_size: 256
267
+ codevector_dim: 768
268
+ contrastive_logits_temperature: 0.1
269
+ conv_bias: true
270
+ conv_dim:
271
+ - 512
272
+ - 512
273
+ - 512
274
+ - 512
275
+ - 512
276
+ - 512
277
+ - 512
278
+ conv_kernel:
279
+ - 10
280
+ - 3
281
+ - 3
282
+ - 3
283
+ - 3
284
+ - 2
285
+ - 2
286
+ conv_stride:
287
+ - 5
288
+ - 2
289
+ - 2
290
+ - 2
291
+ - 2
292
+ - 2
293
+ - 2
294
+ cross_attention_hidden_size: null
295
+ ctc_loss_reduction: sum
296
+ ctc_zero_infinity: false
297
+ decoder_start_token_id: null
298
+ diversity_loss_weight: 0.1
299
+ diversity_penalty: 0.0
300
+ do_sample: false
301
+ do_stable_layer_norm: true
302
+ early_stopping: false
303
+ encoder_no_repeat_ngram_size: 0
304
+ eos_token_id: 2
305
+ exponential_decay_length_penalty: null
306
+ feat_extract_activation: gelu
307
+ feat_extract_dropout: 0.0
308
+ feat_extract_norm: layer
309
+ feat_proj_dropout: 0.0
310
+ feat_quantizer_dropout: 0.0
311
+ final_dropout: 0.0
312
+ finetuning_task: null
313
+ forced_bos_token_id: null
314
+ forced_eos_token_id: null
315
+ gradient_checkpointing: false
316
+ hidden_act: gelu
317
+ hidden_dropout: 0.18004101365999406
318
+ hidden_size: 1024
319
+ id2label:
320
+ '0': LABEL_0
321
+ '1': LABEL_1
322
+ initializer_range: 0.02
323
+ intermediate_size: 4096
324
+ is_decoder: false
325
+ is_encoder_decoder: false
326
+ label2id:
327
+ LABEL_0: 0
328
+ LABEL_1: 1
329
+ layer_norm_eps: 1.0e-05
330
+ layerdrop: 0.0
331
+ length_penalty: 1.0
332
+ mask_feature_length: 10
333
+ mask_feature_min_masks: 0
334
+ mask_feature_prob: 0.0
335
+ mask_time_length: 10
336
+ mask_time_min_masks: 2
337
+ mask_time_prob: 0.1
338
+ max_length: 20
339
+ min_length: 0
340
+ model_type: wav2vec2
341
+ no_repeat_ngram_size: 0
342
+ num_adapter_layers: 3
343
+ num_attention_heads: 16
344
+ num_beam_groups: 1
345
+ num_beams: 1
346
+ num_codevector_groups: 2
347
+ num_codevectors_per_group: 320
348
+ num_conv_pos_embedding_groups: 16
349
+ num_conv_pos_embeddings: 128
350
+ num_feat_extract_layers: 7
351
+ num_hidden_layers: 24
352
+ num_negatives: 100
353
+ num_return_sequences: 1
354
+ output_attentions: false
355
+ output_hidden_size: 1024
356
+ output_hidden_states: false
357
+ output_scores: false
358
+ pad_token_id: 0
359
+ prefix: null
360
+ problem_type: null
361
+ proj_codevector_dim: 768
362
+ pruned_heads: {}
363
+ remove_invalid_values: false
364
+ repetition_penalty: 1.0
365
+ return_dict: true
366
+ return_dict_in_generate: false
367
+ sep_token_id: null
368
+ task_specific_params: null
369
+ tdnn_dilation:
370
+ - 1
371
+ - 2
372
+ - 3
373
+ - 1
374
+ - 1
375
+ tdnn_dim:
376
+ - 512
377
+ - 512
378
+ - 512
379
+ - 512
380
+ - 1500
381
+ tdnn_kernel:
382
+ - 5
383
+ - 3
384
+ - 3
385
+ - 1
386
+ - 1
387
+ temperature: 1.0
388
+ tie_encoder_decoder: false
389
+ tie_word_embeddings: true
390
+ tokenizer_class: null
391
+ top_k: 50
392
+ top_p: 1.0
393
+ torch_dtype: float32
394
+ torchscript: false
395
+ transformers_version: 4.19.0.dev0
396
+ typical_p: 1.0
397
+ use_bfloat16: false
398
+ use_weighted_layer_sum: false
399
+ vocab_size: 32
400
+ xvector_output_dim: 512
401
+ encoder_no_repeat_ngram_size:
402
+ desc: null
403
+ value: 0
404
+ eos_token_id:
405
+ desc: null
406
+ value: 2
407
+ eval_accumulation_steps:
408
+ desc: null
409
+ value: None
410
+ eval_batch_size:
411
+ desc: null
412
+ value: 8
413
+ eval_delay:
414
+ desc: null
415
+ value: 0
416
+ eval_split_name:
417
+ desc: null
418
+ value: test
419
+ eval_steps:
420
+ desc: null
421
+ value: 500
422
+ evaluation_strategy:
423
+ desc: null
424
+ value: steps
425
+ exponential_decay_length_penalty:
426
+ desc: null
427
+ value: null
428
+ finetuning_task:
429
+ desc: null
430
+ value: null
431
+ forced_bos_token_id:
432
+ desc: null
433
+ value: null
434
+ forced_eos_token_id:
435
+ desc: null
436
+ value: null
437
+ fp16:
438
+ desc: null
439
+ value: true
440
+ fp16_backend:
441
+ desc: null
442
+ value: auto
443
+ fp16_full_eval:
444
+ desc: null
445
+ value: false
446
+ fp16_opt_level:
447
+ desc: null
448
+ value: O1
449
+ generation_max_length:
450
+ desc: null
451
+ value: 40
452
+ generation_num_beams:
453
+ desc: null
454
+ value: 1
455
+ gradient_accumulation_steps:
456
+ desc: null
457
+ value: 8
458
+ gradient_checkpointing:
459
+ desc: null
460
+ value: true
461
+ greater_is_better:
462
+ desc: null
463
+ value: true
464
+ group_by_length:
465
+ desc: null
466
+ value: true
467
+ half_precision_backend:
468
+ desc: null
469
+ value: amp
470
+ hidden_dropout:
471
+ desc: null
472
+ value: 0.18004101365999406
473
+ hub_model_id:
474
+ desc: null
475
+ value: None
476
+ hub_private_repo:
477
+ desc: null
478
+ value: false
479
+ hub_strategy:
480
+ desc: null
481
+ value: every_save
482
+ hub_token:
483
+ desc: null
484
+ value: <HUB_TOKEN>
485
+ id2label:
486
+ desc: null
487
+ value:
488
+ '0': LABEL_0
489
+ '1': LABEL_1
490
+ ignore_data_skip:
491
+ desc: null
492
+ value: false
493
+ include_inputs_for_metrics:
494
+ desc: null
495
+ value: false
496
+ is_decoder:
497
+ desc: null
498
+ value: false
499
+ is_encoder_decoder:
500
+ desc: null
501
+ value: true
502
+ label2id:
503
+ desc: null
504
+ value:
505
+ LABEL_0: 0
506
+ LABEL_1: 1
507
+ label_names:
508
+ desc: null
509
+ value: None
510
+ label_smoothing_factor:
511
+ desc: null
512
+ value: 0.0
513
+ language:
514
+ desc: null
515
+ value: fr.en
516
+ learning_rate:
517
+ desc: null
518
+ value: 0.0002757119755681108
519
+ length_column_name:
520
+ desc: null
521
+ value: length
522
+ length_penalty:
523
+ desc: null
524
+ value: 1.0
525
+ load_best_model_at_end:
526
+ desc: null
527
+ value: true
528
+ local_rank:
529
+ desc: null
530
+ value: -1
531
+ log_level:
532
+ desc: null
533
+ value: -1
534
+ log_level_replica:
535
+ desc: null
536
+ value: -1
537
+ log_on_each_node:
538
+ desc: null
539
+ value: true
540
+ logging_dir:
541
+ desc: null
542
+ value: ./runs/May05_16-32-27_sanchit--v100
543
+ logging_first_step:
544
+ desc: null
545
+ value: false
546
+ logging_nan_inf_filter:
547
+ desc: null
548
+ value: true
549
+ logging_steps:
550
+ desc: null
551
+ value: 1
552
+ logging_strategy:
553
+ desc: null
554
+ value: steps
555
+ lr_scheduler_type:
556
+ desc: null
557
+ value: linear
558
+ max_duration_in_seconds:
559
+ desc: null
560
+ value: 20
561
+ max_grad_norm:
562
+ desc: null
563
+ value: 1.0
564
+ max_length:
565
+ desc: null
566
+ value: 40
567
+ max_steps:
568
+ desc: null
569
+ value: -1
570
+ metric_for_best_model:
571
+ desc: null
572
+ value: bleu
573
+ min_length:
574
+ desc: null
575
+ value: 0
576
+ model_name_or_path:
577
+ desc: null
578
+ value: ./
579
+ model_type:
580
+ desc: null
581
+ value: speech-encoder-decoder
582
+ mp_parameters:
583
+ desc: null
584
+ value: ''
585
+ no_cuda:
586
+ desc: null
587
+ value: false
588
+ no_repeat_ngram_size:
589
+ desc: null
590
+ value: 0
591
+ num_beam_groups:
592
+ desc: null
593
+ value: 1
594
+ num_beams:
595
+ desc: null
596
+ value: 1
597
+ num_return_sequences:
598
+ desc: null
599
+ value: 1
600
+ num_train_epochs:
601
+ desc: null
602
+ value: 3
603
+ optim:
604
+ desc: null
605
+ value: adamw_hf
606
+ output_attentions:
607
+ desc: null
608
+ value: false
609
+ output_dir:
610
+ desc: null
611
+ value: ./
612
+ output_hidden_states:
613
+ desc: null
614
+ value: false
615
+ output_scores:
616
+ desc: null
617
+ value: false
618
+ overwrite_output_dir:
619
+ desc: null
620
+ value: true
621
+ pad_token_id:
622
+ desc: null
623
+ value: 1
624
+ past_index:
625
+ desc: null
626
+ value: -1
627
+ per_device_eval_batch_size:
628
+ desc: null
629
+ value: 8
630
+ per_device_train_batch_size:
631
+ desc: null
632
+ value: 8
633
+ per_gpu_eval_batch_size:
634
+ desc: null
635
+ value: None
636
+ per_gpu_train_batch_size:
637
+ desc: null
638
+ value: None
639
+ predict_with_generate:
640
+ desc: null
641
+ value: true
642
+ prediction_loss_only:
643
+ desc: null
644
+ value: false
645
+ prefix:
646
+ desc: null
647
+ value: null
648
+ problem_type:
649
+ desc: null
650
+ value: null
651
+ processor_class:
652
+ desc: null
653
+ value: Wav2Vec2Processor
654
+ pruned_heads:
655
+ desc: null
656
+ value: {}
657
+ push_to_hub:
658
+ desc: null
659
+ value: true
660
+ push_to_hub_model_id:
661
+ desc: null
662
+ value: None
663
+ push_to_hub_organization:
664
+ desc: null
665
+ value: None
666
+ push_to_hub_token:
667
+ desc: null
668
+ value: <PUSH_TO_HUB_TOKEN>
669
+ remove_invalid_values:
670
+ desc: null
671
+ value: false
672
+ remove_unused_columns:
673
+ desc: null
674
+ value: true
675
+ repetition_penalty:
676
+ desc: null
677
+ value: 1.0
678
+ report_to:
679
+ desc: null
680
+ value: '[''tensorboard'', ''wandb'', ''codecarbon'']'
681
+ resume_from_checkpoint:
682
+ desc: null
683
+ value: None
684
+ return_dict:
685
+ desc: null
686
+ value: true
687
+ return_dict_in_generate:
688
+ desc: null
689
+ value: false
690
+ run_name:
691
+ desc: null
692
+ value: ./
693
+ save_on_each_node:
694
+ desc: null
695
+ value: false
696
+ save_steps:
697
+ desc: null
698
+ value: 500
699
+ save_strategy:
700
+ desc: null
701
+ value: steps
702
+ save_total_limit:
703
+ desc: null
704
+ value: None
705
+ seed:
706
+ desc: null
707
+ value: 42
708
+ sep_token_id:
709
+ desc: null
710
+ value: null
711
+ sharded_ddp:
712
+ desc: null
713
+ value: '[]'
714
+ skip_memory_metrics:
715
+ desc: null
716
+ value: true
717
+ sortish_sampler:
718
+ desc: null
719
+ value: false
720
+ task:
721
+ desc: null
722
+ value: covost2
723
+ task_specific_params:
724
+ desc: null
725
+ value: null
726
+ temperature:
727
+ desc: null
728
+ value: 1.0
729
+ tf32:
730
+ desc: null
731
+ value: None
732
+ tie_encoder_decoder:
733
+ desc: null
734
+ value: false
735
+ tie_word_embeddings:
736
+ desc: null
737
+ value: false
738
+ tokenizer_class:
739
+ desc: null
740
+ value: null
741
+ top_k:
742
+ desc: null
743
+ value: 50
744
+ top_p:
745
+ desc: null
746
+ value: 1.0
747
+ torch_dtype:
748
+ desc: null
749
+ value: torch.float32
750
+ torchscript:
751
+ desc: null
752
+ value: false
753
+ tpu_metrics_debug:
754
+ desc: null
755
+ value: false
756
+ tpu_num_cores:
757
+ desc: null
758
+ value: None
759
+ train_batch_size:
760
+ desc: null
761
+ value: 8
762
+ transformers_version:
763
+ desc: null
764
+ value: null
765
+ typical_p:
766
+ desc: null
767
+ value: 1.0
768
+ use_bfloat16:
769
+ desc: null
770
+ value: false
771
+ use_cache:
772
+ desc: null
773
+ value: false
774
+ use_legacy_prediction_loop:
775
+ desc: null
776
+ value: false
777
+ warmup_ratio:
778
+ desc: null
779
+ value: 0.0
780
+ warmup_steps:
781
+ desc: null
782
+ value: 500
783
+ weight_decay:
784
+ desc: null
785
+ value: 0.0
786
+ xpu_backend:
787
+ desc: null
788
+ value: None
wandb/run-20220505_163723-urfp82ib/files/output.log ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: WARNING Config item 'output_dir' was locked by 'sweep' (ignored update).
2
+ wandb: WARNING Config item 'evaluation_strategy' was locked by 'sweep' (ignored update).
3
+ wandb: WARNING Config item 'per_device_train_batch_size' was locked by 'sweep' (ignored update).
4
+ wandb: WARNING Config item 'per_device_eval_batch_size' was locked by 'sweep' (ignored update).
5
+ wandb: WARNING Config item 'gradient_accumulation_steps' was locked by 'sweep' (ignored update).
6
+ wandb: WARNING Config item 'learning_rate' was locked by 'sweep' (ignored update).
7
+ wandb: WARNING Config item 'num_train_epochs' was locked by 'sweep' (ignored update).
8
+ wandb: WARNING Config item 'warmup_steps' was locked by 'sweep' (ignored update).
9
+ wandb: WARNING Config item 'logging_steps' was locked by 'sweep' (ignored update).
10
+ wandb: WARNING Config item 'save_steps' was locked by 'sweep' (ignored update).
11
+ wandb: WARNING Config item 'eval_steps' was locked by 'sweep' (ignored update).
12
+ wandb: WARNING Config item 'metric_for_best_model' was locked by 'sweep' (ignored update).
13
+ wandb: WARNING Config item 'greater_is_better' was locked by 'sweep' (ignored update).
14
+ wandb: WARNING Config item 'generation_max_length' was locked by 'sweep' (ignored update).
15
+ wandb: WARNING Config item 'generation_num_beams' was locked by 'sweep' (ignored update).
16
+ 0%| | 0/9720 [00:00<?, ?it/s]
17
+ Training completed. Do not forget to share your model on huggingface.co/models =)
18
+ Loading best model from ./checkpoint-1000 (score: 2.4961869532998874e-13).
19
+ {'train_runtime': 15.9016, 'train_samples_per_second': 39123.207, 'train_steps_per_second': 611.259, 'train_loss': 0.0, 'epoch': 3.0}
20
+ 0%| | 0/9720 [00:10<?, ?it/s]
21
+ Skipping the first batches: : 0it [00:15, ?it/s] | 0/9720 [00:10<?, ?it/s]
22
+ Saving model checkpoint to ./
23
+ Configuration saved in ./config.json
24
+ Model weights saved in ./pytorch_model.bin
25
+ Feature extractor saved in ./preprocessor_config.json
26
+ Saving model checkpoint to ./
27
+ Configuration saved in ./config.json
28
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
29
+ To disable this warning, you can either:
30
+ - Avoid using `tokenizers` before the fork if possible
31
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
32
+ Model weights saved in ./pytorch_model.bin
33
+ Feature extractor saved in ./preprocessor_config.json
34
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
35
+ To disable this warning, you can either:
36
+ - Avoid using `tokenizers` before the fork if possible
37
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
38
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
39
+ To disable this warning, you can either:
40
+ - Avoid using `tokenizers` before the fork if possible
41
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
42
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43
+ To disable this warning, you can either:
44
+ - Avoid using `tokenizers` before the fork if possible
45
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
46
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
47
+ To disable this warning, you can either:
48
+ - Avoid using `tokenizers` before the fork if possible
49
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
50
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
51
+ To disable this warning, you can either:
52
+ - Avoid using `tokenizers` before the fork if possible
53
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
54
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
55
+ To disable this warning, you can either:
56
+ - Avoid using `tokenizers` before the fork if possible
57
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
58
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
59
+ To disable this warning, you can either:
60
+ - Avoid using `tokenizers` before the fork if possible
61
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
62
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
63
+ To disable this warning, you can either:
64
+ - Avoid using `tokenizers` before the fork if possible
65
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
66
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
67
+ To disable this warning, you can either:
68
+ - Avoid using `tokenizers` before the fork if possible
69
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
70
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
71
+ To disable this warning, you can either:
72
+ - Avoid using `tokenizers` before the fork if possible
73
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
74
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
75
+ To disable this warning, you can either:
76
+ - Avoid using `tokenizers` before the fork if possible
77
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
78
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
79
+ To disable this warning, you can either:
80
+ - Avoid using `tokenizers` before the fork if possible
81
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
82
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
83
+ To disable this warning, you can either:
84
+ - Avoid using `tokenizers` before the fork if possible
85
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
86
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
87
+ To disable this warning, you can either:
88
+ - Avoid using `tokenizers` before the fork if possible
89
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
90
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
91
+ To disable this warning, you can either:
92
+ - Avoid using `tokenizers` before the fork if possible
93
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
94
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
95
+ To disable this warning, you can either:
96
+ - Avoid using `tokenizers` before the fork if possible
97
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
98
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
99
+ To disable this warning, you can either:
100
+ - Avoid using `tokenizers` before the fork if possible
101
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
102
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
103
+ To disable this warning, you can either:
104
+ - Avoid using `tokenizers` before the fork if possible
105
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
106
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
107
+ To disable this warning, you can either:
108
+ - Avoid using `tokenizers` before the fork if possible
109
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
110
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
111
+ To disable this warning, you can either:
112
+ - Avoid using `tokenizers` before the fork if possible
113
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
114
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
115
+ To disable this warning, you can either:
116
+ - Avoid using `tokenizers` before the fork if possible
117
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
118
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
119
+ To disable this warning, you can either:
120
+ - Avoid using `tokenizers` before the fork if possible
121
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
122
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
123
+ To disable this warning, you can either:
124
+ - Avoid using `tokenizers` before the fork if possible
125
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
126
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
127
+ To disable this warning, you can either:
128
+ - Avoid using `tokenizers` before the fork if possible
129
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
130
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
131
+ To disable this warning, you can either:
132
+ - Avoid using `tokenizers` before the fork if possible
133
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
134
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
135
+ To disable this warning, you can either:
136
+ - Avoid using `tokenizers` before the fork if possible
137
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
138
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
139
+ To disable this warning, you can either:
140
+ - Avoid using `tokenizers` before the fork if possible
141
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
142
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
143
+ To disable this warning, you can either:
144
+ - Avoid using `tokenizers` before the fork if possible
145
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
146
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
147
+ To disable this warning, you can either:
148
+ - Avoid using `tokenizers` before the fork if possible
149
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
150
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
151
+ To disable this warning, you can either:
152
+ - Avoid using `tokenizers` before the fork if possible
153
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
154
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
155
+ To disable this warning, you can either:
156
+ - Avoid using `tokenizers` before the fork if possible
157
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
158
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
159
+ To disable this warning, you can either:
160
+ - Avoid using `tokenizers` before the fork if possible
161
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
162
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
163
+ To disable this warning, you can either:
164
+ - Avoid using `tokenizers` before the fork if possible
165
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
166
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
167
+ To disable this warning, you can either:
168
+ - Avoid using `tokenizers` before the fork if possible
169
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
170
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
171
+ To disable this warning, you can either:
172
+ - Avoid using `tokenizers` before the fork if possible
173
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
174
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
175
+ To disable this warning, you can either:
176
+ - Avoid using `tokenizers` before the fork if possible
177
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
178
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
179
+ To disable this warning, you can either:
180
+ - Avoid using `tokenizers` before the fork if possible
181
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
182
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
183
+ To disable this warning, you can either:
184
+ - Avoid using `tokenizers` before the fork if possible
185
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
186
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
187
+ To disable this warning, you can either:
188
+ - Avoid using `tokenizers` before the fork if possible
189
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
190
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
191
+ To disable this warning, you can either:
192
+ - Avoid using `tokenizers` before the fork if possible
193
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
194
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
195
+ To disable this warning, you can either:
196
+ - Avoid using `tokenizers` before the fork if possible
197
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
198
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
199
+ To disable this warning, you can either:
200
+ - Avoid using `tokenizers` before the fork if possible
201
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
202
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
203
+ To disable this warning, you can either:
204
+ - Avoid using `tokenizers` before the fork if possible
205
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
206
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
207
+ To disable this warning, you can either:
208
+ - Avoid using `tokenizers` before the fork if possible
wandb/run-20220505_163723-urfp82ib/files/requirements.txt ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.0.0
2
+ aiohttp==3.8.1
3
+ aiosignal==1.2.0
4
+ alembic==1.7.7
5
+ anyio==3.5.0
6
+ appdirs==1.4.4
7
+ apscheduler==3.9.1
8
+ argon2-cffi-bindings==21.2.0
9
+ argon2-cffi==21.3.0
10
+ arrow==1.2.2
11
+ asttokens==2.0.5
12
+ astunparse==1.6.3
13
+ async-timeout==4.0.2
14
+ attrs==21.4.0
15
+ audioread==2.1.9
16
+ autopage==0.5.0
17
+ babel==2.9.1
18
+ backcall==0.2.0
19
+ backoff==1.11.1
20
+ binaryornot==0.4.4
21
+ bitsandbytes-cuda113==0.26.0
22
+ black==22.1.0
23
+ bleach==4.1.0
24
+ boto3==1.16.34
25
+ botocore==1.19.63
26
+ brotli==1.0.9
27
+ cachetools==5.0.0
28
+ certifi==2021.10.8
29
+ cffi==1.15.0
30
+ chardet==4.0.0
31
+ charset-normalizer==2.0.11
32
+ chex==0.1.0
33
+ click==8.0.3
34
+ cliff==3.10.1
35
+ clldutils==3.10.1
36
+ cmaes==0.8.2
37
+ cmd2==2.4.0
38
+ codecarbon==1.2.0
39
+ colorlog==6.6.0
40
+ cookiecutter==1.7.3
41
+ cryptography==36.0.2
42
+ csvw==1.11.0
43
+ cycler==0.11.0
44
+ dash-bootstrap-components==1.1.0
45
+ dash-core-components==2.0.0
46
+ dash-html-components==2.0.0
47
+ dash-table==5.0.0
48
+ dash==2.3.1
49
+ datasets==2.1.1.dev0
50
+ debugpy==1.5.1
51
+ decorator==5.1.1
52
+ defusedxml==0.7.1
53
+ deprecated==1.2.13
54
+ dill==0.3.4
55
+ dlinfo==1.2.1
56
+ dm-tree==0.1.6
57
+ docker-pycreds==0.4.0
58
+ docker==4.4.4
59
+ entrypoints==0.4
60
+ execnet==1.9.0
61
+ executing==0.8.2
62
+ faiss-cpu==1.7.2
63
+ filelock==3.4.2
64
+ fire==0.4.0
65
+ flake8==4.0.1
66
+ flask-compress==1.11
67
+ flask==2.1.1
68
+ flatbuffers==1.12
69
+ flax==0.4.0
70
+ fonttools==4.29.1
71
+ frozenlist==1.3.0
72
+ fsspec==2022.1.0
73
+ fugashi==1.1.2
74
+ gast==0.5.3
75
+ gitdb==4.0.9
76
+ gitpython==3.1.18
77
+ google-auth-oauthlib==0.4.6
78
+ google-auth==2.6.0
79
+ google-pasta==0.2.0
80
+ greenlet==1.1.2
81
+ grpcio==1.43.0
82
+ h5py==3.6.0
83
+ hf-doc-builder==0.2.0
84
+ huggingface-hub==0.4.0
85
+ hypothesis==6.36.1
86
+ idna==3.3
87
+ importlib-metadata==4.10.1
88
+ iniconfig==1.1.1
89
+ ipadic==1.0.0
90
+ ipdb==0.13.9
91
+ ipykernel==6.8.0
92
+ ipython-genutils==0.2.0
93
+ ipython==8.0.1
94
+ ipywidgets==7.6.5
95
+ isodate==0.6.1
96
+ isort==5.10.1
97
+ itsdangerous==2.1.2
98
+ jax==0.2.28
99
+ jaxlib==0.1.76+cuda11.cudnn82
100
+ jedi==0.18.1
101
+ jinja2-time==0.2.0
102
+ jinja2==3.0.3
103
+ jiwer==2.3.0
104
+ jmespath==0.10.0
105
+ joblib==1.1.0
106
+ json5==0.9.6
107
+ jsonschema==4.4.0
108
+ jupyter-client==7.1.2
109
+ jupyter-console==6.4.0
110
+ jupyter-core==4.9.1
111
+ jupyter-server==1.13.5
112
+ jupyter==1.0.0
113
+ jupyterlab-pygments==0.1.2
114
+ jupyterlab-server==2.10.3
115
+ jupyterlab-widgets==1.0.2
116
+ jupyterlab==3.2.9
117
+ keras-preprocessing==1.1.2
118
+ keras==2.8.0
119
+ kiwisolver==1.3.2
120
+ kubernetes==12.0.1
121
+ libclang==13.0.0
122
+ librosa==0.8.1
123
+ llvmlite==0.38.0
124
+ mako==1.2.0
125
+ markdown==3.3.6
126
+ markupsafe==2.0.1
127
+ matplotlib-inline==0.1.3
128
+ matplotlib==3.5.1
129
+ mccabe==0.6.1
130
+ mistune==0.8.4
131
+ msgpack==1.0.3
132
+ multidict==6.0.2
133
+ multiprocess==0.70.12.2
134
+ mypy-extensions==0.4.3
135
+ nbclassic==0.3.5
136
+ nbclient==0.5.10
137
+ nbconvert==6.4.1
138
+ nbformat==5.1.3
139
+ nest-asyncio==1.5.4
140
+ nltk==3.7
141
+ notebook==6.4.8
142
+ numba==0.55.1
143
+ numpy==1.21.5
144
+ oauthlib==3.2.0
145
+ onnx==1.11.0
146
+ onnxconverter-common==1.9.0
147
+ opt-einsum==3.3.0
148
+ optax==0.1.0
149
+ optuna==2.10.0
150
+ packaging==21.3
151
+ pandas==1.4.0
152
+ pandocfilters==1.5.0
153
+ parameterized==0.8.1
154
+ parso==0.8.3
155
+ pathspec==0.9.0
156
+ pathtools==0.1.2
157
+ pbr==5.8.1
158
+ pexpect==4.8.0
159
+ phonemizer==3.0.1
160
+ pickleshare==0.7.5
161
+ pillow==9.0.0
162
+ pint==0.16.1
163
+ pip==22.0.2
164
+ pkg-resources==0.0.0
165
+ plac==1.3.5
166
+ platformdirs==2.4.1
167
+ plotly==5.6.0
168
+ pluggy==1.0.0
169
+ pooch==1.6.0
170
+ portalocker==2.0.0
171
+ poyo==0.5.0
172
+ prettytable==3.2.0
173
+ prometheus-client==0.13.1
174
+ promise==2.3
175
+ prompt-toolkit==3.0.26
176
+ protobuf==3.19.4
177
+ psutil==5.9.0
178
+ ptyprocess==0.7.0
179
+ pure-eval==0.2.2
180
+ py-cpuinfo==8.0.0
181
+ py==1.11.0
182
+ pyarrow==6.0.1
183
+ pyasn1-modules==0.2.8
184
+ pyasn1==0.4.8
185
+ pycodestyle==2.8.0
186
+ pycparser==2.21
187
+ pyctcdecode==0.3.0
188
+ pyflakes==2.4.0
189
+ pygments==2.11.2
190
+ pygtrie==2.4.2
191
+ pynvml==11.4.1
192
+ pyopenssl==22.0.0
193
+ pyparsing==3.0.7
194
+ pyperclip==1.8.2
195
+ pypng==0.0.21
196
+ pyrsistent==0.18.1
197
+ pytest-forked==1.4.0
198
+ pytest-timeout==2.1.0
199
+ pytest-xdist==2.5.0
200
+ pytest==7.1.1
201
+ python-dateutil==2.8.2
202
+ python-levenshtein==0.12.2
203
+ python-slugify==6.1.1
204
+ pytz-deprecation-shim==0.1.0.post0
205
+ pytz==2021.3
206
+ pyyaml==5.4.1
207
+ pyzmq==22.3.0
208
+ qtconsole==5.2.2
209
+ qtpy==2.0.1
210
+ ray==1.11.0
211
+ redis==4.2.2
212
+ regex==2022.1.18
213
+ requests-oauthlib==1.3.1
214
+ requests==2.27.1
215
+ resampy==0.2.2
216
+ responses==0.18.0
217
+ rfc3986==2.0.0
218
+ rouge-score==0.0.4
219
+ rsa==4.8
220
+ s3transfer==0.3.7
221
+ sacrebleu==1.5.1
222
+ sacremoses==0.0.47
223
+ scikit-learn==1.0.2
224
+ scipy==1.7.3
225
+ segments==2.2.0
226
+ send2trash==1.8.0
227
+ sentencepiece==0.1.96
228
+ sentry-sdk==1.5.6
229
+ setuptools==44.1.1
230
+ shortuuid==1.0.8
231
+ sigopt==8.3.0
232
+ six==1.16.0
233
+ smmap==5.0.0
234
+ sniffio==1.2.0
235
+ sortedcontainers==2.4.0
236
+ soundfile==0.10.3.post1
237
+ sqlalchemy==1.4.34
238
+ stack-data==0.1.4
239
+ stevedore==3.5.0
240
+ tabulate==0.8.9
241
+ tenacity==8.0.1
242
+ tensorboard-data-server==0.6.1
243
+ tensorboard-plugin-wit==1.8.1
244
+ tensorboard==2.8.0
245
+ tensorboardx==2.5
246
+ tensorflow-io-gcs-filesystem==0.24.0
247
+ tensorflow==2.8.0
248
+ termcolor==1.1.0
249
+ terminado==0.13.1
250
+ testpath==0.5.0
251
+ text-unidecode==1.3
252
+ tf-estimator-nightly==2.8.0.dev2021122109
253
+ tf2onnx==1.9.3
254
+ threadpoolctl==3.1.0
255
+ timeout-decorator==0.5.0
256
+ timm==0.5.4
257
+ tokenizers==0.11.4
258
+ toml==0.10.2
259
+ tomli==2.0.0
260
+ toolz==0.11.2
261
+ torch==1.10.2+cu113
262
+ torchaudio==0.10.2+cu113
263
+ torchvision==0.11.3
264
+ tornado==6.1
265
+ tqdm==4.62.3
266
+ traitlets==5.1.1
267
+ transformers==4.18.0.dev0
268
+ typing-extensions==3.10.0.2
269
+ tzdata==2022.1
270
+ tzlocal==4.2
271
+ unidic-lite==1.0.8
272
+ unidic==1.1.0
273
+ uritemplate==4.1.1
274
+ urllib3==1.26.8
275
+ wandb==0.12.10
276
+ wasabi==0.9.1
277
+ wcwidth==0.2.5
278
+ webencodings==0.5.1
279
+ websocket-client==1.2.3
280
+ werkzeug==2.0.2
281
+ wheel==0.37.1
282
+ widgetsnbextension==3.5.2
283
+ wrapt==1.14.0
284
+ xxhash==2.0.2
285
+ yarl==1.7.2
286
+ yaspin==2.1.0
287
+ zipp==3.7.0
wandb/run-20220505_163723-urfp82ib/files/wandb-metadata.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.11.0-1028-gcp-x86_64-with-glibc2.33",
3
+ "python": "3.9.5",
4
+ "heartbeatAt": "2022-05-05T16:37:27.757365",
5
+ "startedAt": "2022-05-05T16:37:23.056856",
6
+ "docker": null,
7
+ "gpu": "Tesla V100-SXM2-16GB",
8
+ "gpu_count": 2,
9
+ "cpu_count": 16,
10
+ "cuda": null,
11
+ "args": [
12
+ "--overwrite_output_dir",
13
+ "--freeze_feature_encoder",
14
+ "--gradient_checkpointing",
15
+ "--predict_with_generate",
16
+ "--fp16",
17
+ "--group_by_length",
18
+ "--do_train",
19
+ "--do_eval",
20
+ "--load_best_model_at_end",
21
+ "--push_to_hub",
22
+ "--use_auth_token",
23
+ "--eval_split_name=test",
24
+ "--eval_steps=500",
25
+ "--evaluation_strategy=steps",
26
+ "--generation_max_length=40",
27
+ "--generation_num_beams=1",
28
+ "--gradient_accumulation_steps=8",
29
+ "--greater_is_better=True",
30
+ "--hidden_dropout=0.18004101365999406",
31
+ "--language=fr.en",
32
+ "--learning_rate=0.0002757119755681108",
33
+ "--logging_steps=1",
34
+ "--max_duration_in_seconds=20",
35
+ "--metric_for_best_model=bleu",
36
+ "--model_name_or_path=./",
37
+ "--num_train_epochs=3",
38
+ "--output_dir=./",
39
+ "--per_device_eval_batch_size=8",
40
+ "--per_device_train_batch_size=8",
41
+ "--save_steps=500",
42
+ "--task=covost2",
43
+ "--warmup_steps=500"
44
+ ],
45
+ "state": "running",
46
+ "program": "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/run_xtreme_s.py",
47
+ "codePath": "run_xtreme_s.py",
48
+ "git": {
49
+ "remote": "https://huggingface.co/sanchit-gandhi/xtreme_s_xlsr_2_bart_covost2_fr_en",
50
+ "commit": "0bc8bdedf4de182e8496246a252ccdd3add039e2"
51
+ },
52
+ "email": "sanchit@huggingface.co",
53
+ "root": "/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en",
54
+ "host": "sanchit--v100",
55
+ "username": "sanchit_huggingface_co",
56
+ "executable": "/home/sanchit_huggingface_co/gcp/bin/python3"
57
+ }
wandb/run-20220505_163723-urfp82ib/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
1
+ {"train/train_runtime": 15.9016, "train/train_samples_per_second": 39123.207, "train/train_steps_per_second": 611.259, "train/total_flos": 0.0, "train/train_loss": 0.0, "train/epoch": 3.0, "train/global_step": 9720, "_runtime": 15, "_timestamp": 1651768658, "_step": 0}
wandb/run-20220505_163723-urfp82ib/logs/debug-internal.log ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-05-05 16:37:24,392 INFO MainThread:65310 [internal.py:wandb_internal():89] W&B internal server running at pid: 65310, started at: 2022-05-05 16:37:24.391844
2
+ 2022-05-05 16:37:24,395 DEBUG SenderThread:65310 [sender.py:send():235] send: header
3
+ 2022-05-05 16:37:24,395 INFO WriterThread:65310 [datastore.py:open_for_write():77] open: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/run-urfp82ib.wandb
4
+ 2022-05-05 16:37:24,395 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: check_version
5
+ 2022-05-05 16:37:24,396 DEBUG SenderThread:65310 [sender.py:send_request():249] send_request: check_version
6
+ 2022-05-05 16:37:24,469 DEBUG SenderThread:65310 [sender.py:send():235] send: run
7
+ 2022-05-05 16:37:24,560 INFO SenderThread:65310 [dir_watcher.py:__init__():169] watching files in: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files
8
+ 2022-05-05 16:37:24,561 INFO SenderThread:65310 [sender.py:_start_run_threads():809] run started: urfp82ib with start time 1651768643
9
+ 2022-05-05 16:37:24,561 DEBUG SenderThread:65310 [sender.py:send():235] send: summary
10
+ 2022-05-05 16:37:24,561 INFO SenderThread:65310 [sender.py:_save_file():944] saving file wandb-summary.json with policy end
11
+ 2022-05-05 16:37:24,562 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: run_start
12
+ 2022-05-05 16:37:25,562 INFO Thread-8 :65310 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/wandb-summary.json
13
+ 2022-05-05 16:37:27,757 DEBUG HandlerThread:65310 [meta.py:__init__():36] meta init
14
+ 2022-05-05 16:37:27,757 DEBUG HandlerThread:65310 [meta.py:__init__():50] meta init done
15
+ 2022-05-05 16:37:27,757 DEBUG HandlerThread:65310 [meta.py:probe():210] probe
16
+ 2022-05-05 16:37:27,763 DEBUG HandlerThread:65310 [meta.py:_setup_git():200] setup git
17
+ 2022-05-05 16:37:27,796 DEBUG HandlerThread:65310 [meta.py:_setup_git():207] setup git done
18
+ 2022-05-05 16:37:27,797 DEBUG HandlerThread:65310 [meta.py:_save_pip():54] save pip
19
+ 2022-05-05 16:37:27,797 DEBUG HandlerThread:65310 [meta.py:_save_pip():68] save pip done
20
+ 2022-05-05 16:37:27,798 DEBUG HandlerThread:65310 [meta.py:probe():248] probe done
21
+ 2022-05-05 16:37:27,803 DEBUG SenderThread:65310 [sender.py:send():235] send: files
22
+ 2022-05-05 16:37:27,804 INFO SenderThread:65310 [sender.py:_save_file():944] saving file wandb-metadata.json with policy now
23
+ 2022-05-05 16:37:27,810 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: stop_status
24
+ 2022-05-05 16:37:27,810 DEBUG SenderThread:65310 [sender.py:send_request():249] send_request: stop_status
25
+ 2022-05-05 16:37:27,844 DEBUG SenderThread:65310 [sender.py:send():235] send: config
26
+ 2022-05-05 16:37:27,845 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
27
+ 2022-05-05 16:37:27,845 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
28
+ 2022-05-05 16:37:27,845 WARNING SenderThread:65310 [sender.py:send_metric():902] Seen metric with glob (shouldnt happen)
29
+ 2022-05-05 16:37:28,059 INFO Thread-11 :65310 [upload_job.py:push():137] Uploaded file /tmp/tmp1h8vnyd5wandb/1lwnw59n-wandb-metadata.json
30
+ 2022-05-05 16:37:28,600 INFO Thread-8 :65310 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/wandb-metadata.json
31
+ 2022-05-05 16:37:28,600 INFO Thread-8 :65310 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/requirements.txt
32
+ 2022-05-05 16:37:28,601 INFO Thread-8 :65310 [dir_watcher.py:_on_file_created():217] file/dir created: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
33
+ 2022-05-05 16:37:30,601 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
34
+ 2022-05-05 16:37:32,602 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
35
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
36
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
37
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
38
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
39
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
40
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: metric
41
+ 2022-05-05 16:37:38,641 DEBUG SenderThread:65310 [sender.py:send():235] send: history
42
+ 2022-05-05 16:37:38,642 DEBUG SenderThread:65310 [sender.py:send():235] send: summary
43
+ 2022-05-05 16:37:38,643 INFO SenderThread:65310 [sender.py:_save_file():944] saving file wandb-summary.json with policy end
44
+ 2022-05-05 16:37:39,604 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/wandb-summary.json
45
+ 2022-05-05 16:37:40,605 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
46
+ 2022-05-05 16:37:42,856 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: stop_status
47
+ 2022-05-05 16:37:42,856 DEBUG SenderThread:65310 [sender.py:send_request():249] send_request: stop_status
48
+ 2022-05-05 16:37:52,609 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
49
+ 2022-05-05 16:37:55,611 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/config.yaml
50
+ 2022-05-05 16:37:56,176 DEBUG SenderThread:65310 [sender.py:send():235] send: stats
51
+ 2022-05-05 16:37:57,896 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: stop_status
52
+ 2022-05-05 16:37:57,896 DEBUG SenderThread:65310 [sender.py:send_request():249] send_request: stop_status
53
+ 2022-05-05 16:38:04,614 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
54
+ 2022-05-05 16:38:12,927 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: stop_status
55
+ 2022-05-05 16:38:12,928 DEBUG SenderThread:65310 [sender.py:send_request():249] send_request: stop_status
56
+ 2022-05-05 16:38:22,621 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
57
+ 2022-05-05 16:38:24,622 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
58
+ 2022-05-05 16:38:26,570 DEBUG SenderThread:65310 [sender.py:send():235] send: stats
59
+ 2022-05-05 16:38:26,622 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
60
+ 2022-05-05 16:38:28,216 DEBUG HandlerThread:65310 [handler.py:handle_request():131] handle_request: stop_status
61
+ 2022-05-05 16:38:28,216 DEBUG SenderThread:65310 [sender.py:send_request():249] send_request: stop_status
62
+ 2022-05-05 16:38:28,624 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
63
+ 2022-05-05 16:38:30,624 INFO Thread-8 :65310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/files/output.log
wandb/run-20220505_163723-urfp82ib/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-05-05 16:37:23,058 INFO MainThread:65172 [wandb_setup.py:_flush():75] Loading settings from /home/sanchit_huggingface_co/.config/wandb/settings
2
+ 2022-05-05 16:37:23,058 INFO MainThread:65172 [wandb_setup.py:_flush():75] Loading settings from wandb/settings
3
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_setup.py:_flush():75] Loading settings from environment variables: {'entity': 'sanchit-gandhi', 'project': 'xtreme_s_xlsr_2_bart_covost2_fr_en', 'sweep_id': 'pvyx3mpp', 'root_dir': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en', 'run_id': 'urfp82ib', 'sweep_param_path': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/sweep-pvyx3mpp/config-urfp82ib.yaml'}
4
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_setup.py:_flush():75] Inferring run settings from compute environment: {'program_relpath': 'run_xtreme_s.py', 'program': '/home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/run_xtreme_s.py'}
5
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_init.py:_log_setup():386] Logging user logs to /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/logs/debug.log
6
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_init.py:_log_setup():387] Logging internal logs to /home/sanchit_huggingface_co/xtreme_s_xlsr_2_bart_covost2_fr_en/wandb/run-20220505_163723-urfp82ib/logs/debug-internal.log
7
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_init.py:init():420] calling init triggers
8
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_init.py:init():425] wandb.init called with sweep_config: {'eval_split_name': 'test', 'eval_steps': 500, 'evaluation_strategy': 'steps', 'generation_max_length': 40, 'generation_num_beams': 1, 'gradient_accumulation_steps': 8, 'greater_is_better': True, 'hidden_dropout': 0.18004101365999406, 'language': 'fr.en', 'learning_rate': 0.0002757119755681108, 'logging_steps': 1, 'max_duration_in_seconds': 20, 'metric_for_best_model': 'bleu', 'model_name_or_path': './', 'num_train_epochs': 3, 'output_dir': './', 'per_device_eval_batch_size': 8, 'per_device_train_batch_size': 8, 'save_steps': 500, 'task': 'covost2', 'warmup_steps': 500}
9
+ config: {}
10
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [wandb_init.py:init():471] starting backend
11
+ 2022-05-05 16:37:23,059 INFO MainThread:65172 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
12
+ 2022-05-05 16:37:23,142 INFO MainThread:65172 [backend.py:ensure_launched():219] starting backend process...
13
+ 2022-05-05 16:37:23,224 INFO MainThread:65172 [backend.py:ensure_launched():224] started backend process with pid: 65310
14
+ 2022-05-05 16:37:23,226 INFO MainThread:65172 [wandb_init.py:init():480] backend started and connected
15
+ 2022-05-05 16:37:23,229 INFO MainThread:65172 [wandb_run.py:_config_callback():966] config_cb None None {'eval_split_name': 'test', 'eval_steps': 500, 'evaluation_strategy': 'steps', 'generation_max_length': 40, 'generation_num_beams': 1, 'gradient_accumulation_steps': 8, 'greater_is_better': True, 'hidden_dropout': 0.18004101365999406, 'language': 'fr.en', 'learning_rate': 0.0002757119755681108, 'logging_steps': 1, 'max_duration_in_seconds': 20, 'metric_for_best_model': 'bleu', 'model_name_or_path': './', 'num_train_epochs': 3, 'output_dir': './', 'per_device_eval_batch_size': 8, 'per_device_train_batch_size': 8, 'save_steps': 500, 'task': 'covost2', 'warmup_steps': 500}
16
+ 2022-05-05 16:37:23,244 INFO MainThread:65172 [wandb_init.py:init():550] updated telemetry
17
+ 2022-05-05 16:37:23,438 INFO MainThread:65172 [wandb_init.py:init():581] communicating current version
18
+ 2022-05-05 16:37:24,467 INFO MainThread:65172 [wandb_init.py:init():586] got version response upgrade_message: "wandb version 0.12.16 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
19
+
20
+ 2022-05-05 16:37:24,467 INFO MainThread:65172 [wandb_init.py:init():596] communicating run to backend with 30 second timeout
21
+ 2022-05-05 16:37:24,561 INFO MainThread:65172 [wandb_init.py:init():624] starting run threads in backend
22
+ 2022-05-05 16:37:27,809 INFO MainThread:65172 [wandb_run.py:_console_start():1827] atexit reg
23
+ 2022-05-05 16:37:27,810 INFO MainThread:65172 [wandb_run.py:_redirect():1701] redirect: SettingsConsole.REDIRECT
24
+ 2022-05-05 16:37:27,811 INFO MainThread:65172 [wandb_run.py:_redirect():1706] Redirecting console.
25
+ 2022-05-05 16:37:27,812 INFO MainThread:65172 [wandb_run.py:_redirect():1762] Redirects installed.
26
+ 2022-05-05 16:37:27,812 INFO MainThread:65172 [wandb_init.py:init():651] run started, returning control to user process
27
+ 2022-05-05 16:37:27,815 INFO MainThread:65172 [wandb_run.py:_config_callback():966] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'torch.float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 40, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['SpeechEncoderDecoderModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': None, 'problem_type': None, '_name_or_path': './', 'transformers_version': None, 'decoder': {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim': 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.1, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': True, 'cross_attention_hidden_size': None, 'add_cross_attention': True, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': True, 'num_beams': 4, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['BartModel'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': 2, 'task_specific_params': {'summarization': {'length_penalty': 1.0, 'max_length': 128, 'min_length': 12, 'num_beams': 4}, 'summarization_cnn': {'length_penalty': 2.0, 'max_length': 142, 'min_length': 56, 'num_beams': 4}, 'summarization_xsum': {'length_penalty': 1.0, 'max_length': 62, 'min_length': 11, 'num_beams': 6}}, 'problem_type': None, '_name_or_path': 'facebook/bart-large', 'transformers_version': '4.19.0.dev0', 'add_bias_logits': False, 'add_final_layer_norm': False, 'classif_dropout': 0.1, 'gradient_checkpointing': False, 'normalize_before': False, 'model_type': 'bart'}, 'encoder': {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'architectures': ['Wav2Vec2ForPreTraining'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/wav2vec2-xls-r-300m', 'transformers_version': '4.19.0.dev0', 'feat_extract_dropout': 0.0, 'gradient_checkpointing': False, 'num_feat_extract_layers': 7, 'hidden_size': 1024, 'feat_extract_norm': 'layer', 'feat_extract_activation': 'gelu', 'conv_dim': [512, 512, 512, 512, 512, 512, 512], 'conv_stride': [5, 2, 2, 2, 2, 2, 2], 'conv_kernel': [10, 3, 3, 3, 3, 2, 2], 'conv_bias': True, 'num_conv_pos_embeddings': 128, 'num_conv_pos_embedding_groups': 16, 'num_hidden_layers': 24, 'intermediate_size': 4096, 'hidden_act': 'gelu', 'num_attention_heads': 16, 'hidden_dropout': 0.18004101365999406, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'feat_proj_dropout': 0.0, 'final_dropout': 0.0, 'layerdrop': 0.0, 'layer_norm_eps': 1e-05, 'initializer_range': 0.02, 'vocab_size': 32, 'do_stable_layer_norm': True, 'use_weighted_layer_sum': False, 'apply_spec_augment': True, 'mask_time_prob': 0.1, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'num_codevectors_per_group': 320, 'num_codevector_groups': 2, 'contrastive_logits_temperature': 0.1, 'feat_quantizer_dropout': 0.0, 'num_negatives': 100, 'codevector_dim': 768, 'proj_codevector_dim': 768, 'diversity_loss_weight': 0.1, 'ctc_loss_reduction': 'sum', 'ctc_zero_infinity': False, 'add_adapter': True, 'adapter_kernel_size': 3, 'adapter_stride': 2, 'num_adapter_layers': 3, 'output_hidden_size': 1024, 'classifier_proj_size': 256, 'tdnn_dim': [512, 512, 512, 512, 1500], 'tdnn_kernel': [5, 3, 3, 1, 1], 'tdnn_dilation': [1, 2, 3, 1, 1], 'xvector_output_dim': 512, 'model_type': 'wav2vec2'}, 'model_type': 'speech-encoder-decoder', 'processor_class': 'Wav2Vec2Processor', 'use_cache': False, 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'prediction_loss_only': False, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.0, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/May05_16-32-27_sanchit--v100', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_total_limit': 'None', 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'data_seed': 'None', 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'amp', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': -1, 'xpu_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': True, 'ignore_data_skip': False, 'sharded_ddp': '[]', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'adamw_hf', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['tensorboard', 'wandb', 'codecarbon']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': True, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', '_n_gpu': 1, 'mp_parameters': '', 'sortish_sampler': False, 'predict_with_generate': True, 'train_batch_size': 8, 'eval_batch_size': 8}
28
+ 2022-05-05 16:37:27,818 INFO MainThread:65172 [wandb_watch.py:watch():43] Watching
wandb/run-20220505_163723-urfp82ib/run-urfp82ib.wandb ADDED
Binary file (26.6 kB). View file
wandb/sweep-pvyx3mpp/config-urfp82ib.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ eval_split_name:
4
+ value: test
5
+ eval_steps:
6
+ value: 500
7
+ evaluation_strategy:
8
+ value: steps
9
+ generation_max_length:
10
+ value: 40
11
+ generation_num_beams:
12
+ value: 1
13
+ gradient_accumulation_steps:
14
+ value: 8
15
+ greater_is_better:
16
+ value: true
17
+ hidden_dropout:
18
+ value: 0.18004101365999406
19
+ language:
20
+ value: fr.en
21
+ learning_rate:
22
+ value: 0.0002757119755681108
23
+ logging_steps:
24
+ value: 1
25
+ max_duration_in_seconds:
26
+ value: 20
27
+ metric_for_best_model:
28
+ value: bleu
29
+ model_name_or_path:
30
+ value: ./
31
+ num_train_epochs:
32
+ value: 3
33
+ output_dir:
34
+ value: ./
35
+ per_device_eval_batch_size:
36
+ value: 8
37
+ per_device_train_batch_size:
38
+ value: 8
39
+ save_steps:
40
+ value: 500
41
+ task:
42
+ value: covost2
43
+ warmup_steps:
44
+ value: 500