wandb_version: 1 _n_gpu: desc: null value: 1 _name_or_path: desc: null value: ./ _wandb: desc: null value: cli_version: 0.12.10 framework: huggingface huggingface_version: 4.17.0.dev0 is_jupyter_run: false is_kaggle_kernel: false m: - 1: train/global_step 6: - 3 python_version: 3.9.5 start_time: 1646268136 t: 1: - 1 - 5 - 11 2: - 1 - 5 - 11 3: - 1 - 7 - 13 4: 3.9.5 5: 0.12.10 6: 4.17.0.dev0 8: - 5 adafactor: desc: null value: false adam_beta1: desc: null value: 0.9 adam_beta2: desc: null value: 0.999 adam_epsilon: desc: null value: 1.0e-08 add_cross_attention: desc: null value: false architectures: desc: null value: - SpeechEncoderDecoderModel bad_words_ids: desc: null value: null bf16: desc: null value: false bf16_full_eval: desc: null value: false bos_token_id: desc: null value: null chunk_size_feed_forward: desc: null value: 0 cross_attention_hidden_size: desc: null value: null dataloader_drop_last: desc: null value: false dataloader_num_workers: desc: null value: 0 dataloader_pin_memory: desc: null value: true ddp_bucket_cap_mb: desc: null value: None ddp_find_unused_parameters: desc: null value: None debug: desc: null value: '[]' decoder: desc: null value: _name_or_path: gpt2-medium activation_function: gelu_new add_cross_attention: true architectures: - GPT2LMHeadModel attn_pdrop: 0.0 bad_words_ids: null bos_token_id: 50256 chunk_size_feed_forward: 0 cross_attention_hidden_size: null decoder_start_token_id: null diversity_penalty: 0.0 do_sample: false early_stopping: false embd_pdrop: 0.0 encoder_no_repeat_ngram_size: 0 eos_token_id: 50256 finetuning_task: null forced_bos_token_id: null forced_eos_token_id: null id2label: '0': LABEL_0 '1': LABEL_1 initializer_range: 0.02 is_decoder: true is_encoder_decoder: false label2id: LABEL_0: 0 LABEL_1: 1 layer_norm_epsilon: 1.0e-05 length_penalty: 1.0 max_length: 20 min_length: 0 model_type: gpt2 n_ctx: 1024 n_embd: 1024 n_head: 16 n_inner: null n_layer: 24 n_positions: 1024 n_special: 0 no_repeat_ngram_size: 0 num_beam_groups: 1 num_beams: 1 num_return_sequences: 1 output_attentions: false output_hidden_states: false output_scores: false pad_token_id: null predict_special_tokens: true prefix: null problem_type: null pruned_heads: {} remove_invalid_values: false reorder_and_upcast_attn: false repetition_penalty: 1.0 resid_pdrop: 0.0 return_dict: true return_dict_in_generate: false scale_attn_by_inverse_layer_idx: false scale_attn_weights: true sep_token_id: null summary_activation: null summary_first_dropout: 0.0 summary_proj_to_labels: true summary_type: cls_index summary_use_proj: true task_specific_params: text-generation: do_sample: true max_length: 50 temperature: 1.0 tie_encoder_decoder: false tie_word_embeddings: true tokenizer_class: null top_k: 50 top_p: 1.0 torch_dtype: null torchscript: false transformers_version: 4.17.0.dev0 use_bfloat16: false use_cache: false vocab_size: 50257 decoder_start_token_id: desc: null value: 50256 deepspeed: desc: null value: None disable_tqdm: desc: null value: false diversity_penalty: desc: null value: 0.0 do_eval: desc: null value: true do_predict: desc: null value: false do_sample: desc: null value: false do_train: desc: null value: true early_stopping: desc: null value: false encoder: desc: null value: _name_or_path: facebook/wav2vec2-large-lv60 activation_dropout: 0.0 adapter_kernel_size: 3 adapter_stride: 2 add_adapter: true add_cross_attention: false apply_spec_augment: false architectures: - Wav2Vec2ForPreTraining attention_dropout: 0.0 bad_words_ids: null bos_token_id: 1 chunk_size_feed_forward: 0 classifier_proj_size: 256 codevector_dim: 768 contrastive_logits_temperature: 0.1 conv_bias: true conv_dim: - 512 - 512 - 512 - 512 - 512 - 512 - 512 conv_kernel: - 10 - 3 - 3 - 3 - 3 - 2 - 2 conv_stride: - 5 - 2 - 2 - 2 - 2 - 2 - 2 cross_attention_hidden_size: null ctc_loss_reduction: sum ctc_zero_infinity: false decoder_start_token_id: null diversity_loss_weight: 0.1 diversity_penalty: 0.0 do_sample: false do_stable_layer_norm: true early_stopping: false encoder_no_repeat_ngram_size: 0 eos_token_id: 2 feat_extract_activation: gelu feat_extract_dropout: 0.0 feat_extract_norm: layer feat_proj_dropout: 0.0 feat_quantizer_dropout: 0.0 final_dropout: 0.0 finetuning_task: null forced_bos_token_id: null forced_eos_token_id: null gradient_checkpointing: false hidden_act: gelu hidden_dropout: 0.0 hidden_dropout_prob: 0.0 hidden_size: 1024 id2label: '0': LABEL_0 '1': LABEL_1 initializer_range: 0.02 intermediate_size: 4096 is_decoder: false is_encoder_decoder: false label2id: LABEL_0: 0 LABEL_1: 1 layer_norm_eps: 1.0e-05 layerdrop: 0.0 length_penalty: 1.0 mask_feature_length: 10 mask_feature_min_masks: 0 mask_feature_prob: 0.0 mask_time_length: 10 mask_time_min_masks: 2 mask_time_prob: 0.0 max_length: 20 min_length: 0 model_type: wav2vec2 no_repeat_ngram_size: 0 num_adapter_layers: 3 num_attention_heads: 16 num_beam_groups: 1 num_beams: 1 num_codevector_groups: 2 num_codevectors_per_group: 320 num_conv_pos_embedding_groups: 16 num_conv_pos_embeddings: 128 num_feat_extract_layers: 7 num_hidden_layers: 24 num_negatives: 100 num_return_sequences: 1 output_attentions: false output_hidden_size: 1024 output_hidden_states: false output_scores: false pad_token_id: 0 prefix: null problem_type: null proj_codevector_dim: 768 pruned_heads: {} remove_invalid_values: false repetition_penalty: 1.0 return_dict: true return_dict_in_generate: false sep_token_id: null task_specific_params: null tdnn_dilation: - 1 - 2 - 3 - 1 - 1 tdnn_dim: - 512 - 512 - 512 - 512 - 1500 tdnn_kernel: - 5 - 3 - 3 - 1 - 1 temperature: 1.0 tie_encoder_decoder: false tie_word_embeddings: true tokenizer_class: null top_k: 50 top_p: 1.0 torch_dtype: null torchscript: false transformers_version: 4.17.0.dev0 use_bfloat16: false use_weighted_layer_sum: false vocab_size: 32 xvector_output_dim: 512 encoder_no_repeat_ngram_size: desc: null value: 0 eos_token_id: desc: null value: 50256 eval_accumulation_steps: desc: null value: None eval_batch_size: desc: null value: 16 eval_steps: desc: null value: 500 evaluation_strategy: desc: null value: steps finetuning_task: desc: null value: null forced_bos_token_id: desc: null value: null forced_eos_token_id: desc: null value: null fp16: desc: null value: true fp16_backend: desc: null value: auto fp16_full_eval: desc: null value: false fp16_opt_level: desc: null value: O1 generation_max_length: desc: null value: 40 generation_num_beams: desc: null value: 1 gradient_accumulation_steps: desc: null value: 4 gradient_checkpointing: desc: null value: true greater_is_better: desc: null value: None group_by_length: desc: null value: true half_precision_backend: desc: null value: amp hub_model_id: desc: null value: None hub_strategy: desc: null value: every_save hub_token: desc: null value: id2label: desc: null value: '0': LABEL_0 '1': LABEL_1 ignore_data_skip: desc: null value: false is_decoder: desc: null value: false is_encoder_decoder: desc: null value: true label2id: desc: null value: LABEL_0: 0 LABEL_1: 1 label_names: desc: null value: None label_smoothing_factor: desc: null value: 0.0 learning_rate: desc: null value: 0.0003 length_column_name: desc: null value: input_length length_penalty: desc: null value: 1.0 load_best_model_at_end: desc: null value: false local_rank: desc: null value: -1 log_level: desc: null value: -1 log_level_replica: desc: null value: -1 log_on_each_node: desc: null value: true logging_dir: desc: null value: ./runs/Mar03_00-41-32_sanchit--v100 logging_first_step: desc: null value: false logging_nan_inf_filter: desc: null value: true logging_steps: desc: null value: 1 logging_strategy: desc: null value: steps lr_scheduler_type: desc: null value: linear max_grad_norm: desc: null value: 1.0 max_length: desc: null value: 50 max_steps: desc: null value: -1 metric_for_best_model: desc: null value: None min_length: desc: null value: 0 model_type: desc: null value: speech-encoder-decoder mp_parameters: desc: null value: '' no_cuda: desc: null value: false no_repeat_ngram_size: desc: null value: 0 num_beam_groups: desc: null value: 1 num_beams: desc: null value: 1 num_return_sequences: desc: null value: 1 num_train_epochs: desc: null value: 1.0 optim: desc: null value: adamw_hf output_attentions: desc: null value: false output_dir: desc: null value: ./ output_hidden_states: desc: null value: false output_scores: desc: null value: false overwrite_output_dir: desc: null value: true pad_token_id: desc: null value: 50256 past_index: desc: null value: -1 per_device_eval_batch_size: desc: null value: 16 per_device_train_batch_size: desc: null value: 16 per_gpu_eval_batch_size: desc: null value: None per_gpu_train_batch_size: desc: null value: None predict_with_generate: desc: null value: true prediction_loss_only: desc: null value: false prefix: desc: null value: null problem_type: desc: null value: null processor_class: desc: null value: Wav2Vec2Processor pruned_heads: desc: null value: {} push_to_hub: desc: null value: true push_to_hub_model_id: desc: null value: None push_to_hub_organization: desc: null value: None push_to_hub_token: desc: null value: remove_invalid_values: desc: null value: false remove_unused_columns: desc: null value: true repetition_penalty: desc: null value: 1.0 report_to: desc: null value: '[''wandb'']' resume_from_checkpoint: desc: null value: None return_dict: desc: null value: true return_dict_in_generate: desc: null value: false run_name: desc: null value: ./ save_on_each_node: desc: null value: false save_steps: desc: null value: 500 save_strategy: desc: null value: steps save_total_limit: desc: null value: 1 seed: desc: null value: 42 sep_token_id: desc: null value: null sharded_ddp: desc: null value: '[]' skip_memory_metrics: desc: null value: true sortish_sampler: desc: null value: false task_specific_params: desc: null value: null temperature: desc: null value: 1.0 tf32: desc: null value: None tie_encoder_decoder: desc: null value: false tie_word_embeddings: desc: null value: false tokenizer_class: desc: null value: null top_k: desc: null value: 50 top_p: desc: null value: 1.0 torch_dtype: desc: null value: torch.float32 torchscript: desc: null value: false tpu_metrics_debug: desc: null value: false tpu_num_cores: desc: null value: None train_batch_size: desc: null value: 16 transformers_version: desc: null value: null use_bfloat16: desc: null value: false use_cache: desc: null value: false use_legacy_prediction_loop: desc: null value: false warmup_ratio: desc: null value: 0.0 warmup_steps: desc: null value: 500 weight_decay: desc: null value: 0.0 xpu_backend: desc: null value: None