sanchit-gandhi's picture
Model save
f79bbe5
wandb_version: 1
_n_gpu:
desc: null
value: 1
_name_or_path:
desc: null
value: ./
_wandb:
desc: null
value:
cli_version: 0.12.10
framework: huggingface
huggingface_version: 4.19.0.dev0
is_jupyter_run: false
is_kaggle_kernel: false
m:
- 1: train/global_step
6:
- 3
- 1: train/train_runtime
5: 1
6:
- 1
- 1: train/train_samples_per_second
5: 1
6:
- 1
- 1: train/train_steps_per_second
5: 1
6:
- 1
- 1: train/total_flos
5: 1
6:
- 1
- 1: train/train_loss
5: 1
6:
- 1
- 1: train/epoch
5: 1
6:
- 1
python_version: 3.9.5
start_time: 1651768643
t:
1:
- 1
- 2
- 3
- 5
- 11
- 12
2:
- 1
- 2
- 3
- 5
- 11
- 12
3:
- 1
- 7
- 13
4: 3.9.5
5: 0.12.10
6: 4.19.0.dev0
8:
- 5
adafactor:
desc: null
value: false
adam_beta1:
desc: null
value: 0.9
adam_beta2:
desc: null
value: 0.999
adam_epsilon:
desc: null
value: 1.0e-08
add_cross_attention:
desc: null
value: false
architectures:
desc: null
value:
- SpeechEncoderDecoderModel
bad_words_ids:
desc: null
value: null
bf16:
desc: null
value: false
bf16_full_eval:
desc: null
value: false
bos_token_id:
desc: null
value: null
chunk_size_feed_forward:
desc: null
value: 0
cross_attention_hidden_size:
desc: null
value: null
data_seed:
desc: null
value: None
dataloader_drop_last:
desc: null
value: false
dataloader_num_workers:
desc: null
value: 0
dataloader_pin_memory:
desc: null
value: true
ddp_bucket_cap_mb:
desc: null
value: None
ddp_find_unused_parameters:
desc: null
value: None
debug:
desc: null
value: '[]'
decoder:
desc: null
value:
_name_or_path: facebook/bart-large
activation_dropout: 0.1
activation_function: gelu
add_bias_logits: false
add_cross_attention: true
add_final_layer_norm: false
architectures:
- BartModel
attention_dropout: 0.1
bad_words_ids: null
bos_token_id: 0
chunk_size_feed_forward: 0
classif_dropout: 0.1
classifier_dropout: 0.0
cross_attention_hidden_size: null
d_model: 1024
decoder_attention_heads: 16
decoder_ffn_dim: 4096
decoder_layerdrop: 0.0
decoder_layers: 12
decoder_start_token_id: 2
diversity_penalty: 0.0
do_sample: false
dropout: 0.1
early_stopping: true
encoder_attention_heads: 16
encoder_ffn_dim: 4096
encoder_layerdrop: 0.0
encoder_layers: 12
encoder_no_repeat_ngram_size: 0
eos_token_id: 2
exponential_decay_length_penalty: null
finetuning_task: null
forced_bos_token_id: 0
forced_eos_token_id: 2
gradient_checkpointing: false
id2label:
'0': LABEL_0
'1': LABEL_1
'2': LABEL_2
init_std: 0.02
is_decoder: true
is_encoder_decoder: false
label2id:
LABEL_0: 0
LABEL_1: 1
LABEL_2: 2
length_penalty: 1.0
max_length: 20
max_position_embeddings: 1024
min_length: 0
model_type: bart
no_repeat_ngram_size: 3
normalize_before: false
num_beam_groups: 1
num_beams: 4
num_hidden_layers: 12
num_return_sequences: 1
output_attentions: false
output_hidden_states: false
output_scores: false
pad_token_id: 1
prefix: null
problem_type: null
pruned_heads: {}
remove_invalid_values: false
repetition_penalty: 1.0
return_dict: true
return_dict_in_generate: false
scale_embedding: false
sep_token_id: null
task_specific_params:
summarization:
length_penalty: 1.0
max_length: 128
min_length: 12
num_beams: 4
summarization_cnn:
length_penalty: 2.0
max_length: 142
min_length: 56
num_beams: 4
summarization_xsum:
length_penalty: 1.0
max_length: 62
min_length: 11
num_beams: 6
temperature: 1.0
tie_encoder_decoder: false
tie_word_embeddings: true
tokenizer_class: null
top_k: 50
top_p: 1.0
torch_dtype: null
torchscript: false
transformers_version: 4.19.0.dev0
typical_p: 1.0
use_bfloat16: false
use_cache: true
vocab_size: 50265
decoder_start_token_id:
desc: null
value: 0
deepspeed:
desc: null
value: None
disable_tqdm:
desc: null
value: false
diversity_penalty:
desc: null
value: 0.0
do_eval:
desc: null
value: true
do_predict:
desc: null
value: false
do_sample:
desc: null
value: false
do_train:
desc: null
value: true
early_stopping:
desc: null
value: false
encoder:
desc: null
value:
_name_or_path: facebook/wav2vec2-xls-r-300m
activation_dropout: 0.0
adapter_kernel_size: 3
adapter_stride: 2
add_adapter: true
add_cross_attention: false
apply_spec_augment: true
architectures:
- Wav2Vec2ForPreTraining
attention_dropout: 0.1
bad_words_ids: null
bos_token_id: 1
chunk_size_feed_forward: 0
classifier_proj_size: 256
codevector_dim: 768
contrastive_logits_temperature: 0.1
conv_bias: true
conv_dim:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
conv_kernel:
- 10
- 3
- 3
- 3
- 3
- 2
- 2
conv_stride:
- 5
- 2
- 2
- 2
- 2
- 2
- 2
cross_attention_hidden_size: null
ctc_loss_reduction: sum
ctc_zero_infinity: false
decoder_start_token_id: null
diversity_loss_weight: 0.1
diversity_penalty: 0.0
do_sample: false
do_stable_layer_norm: true
early_stopping: false
encoder_no_repeat_ngram_size: 0
eos_token_id: 2
exponential_decay_length_penalty: null
feat_extract_activation: gelu
feat_extract_dropout: 0.0
feat_extract_norm: layer
feat_proj_dropout: 0.0
feat_quantizer_dropout: 0.0
final_dropout: 0.0
finetuning_task: null
forced_bos_token_id: null
forced_eos_token_id: null
gradient_checkpointing: false
hidden_act: gelu
hidden_dropout: 0.18004101365999406
hidden_size: 1024
id2label:
'0': LABEL_0
'1': LABEL_1
initializer_range: 0.02
intermediate_size: 4096
is_decoder: false
is_encoder_decoder: false
label2id:
LABEL_0: 0
LABEL_1: 1
layer_norm_eps: 1.0e-05
layerdrop: 0.0
length_penalty: 1.0
mask_feature_length: 10
mask_feature_min_masks: 0
mask_feature_prob: 0.0
mask_time_length: 10
mask_time_min_masks: 2
mask_time_prob: 0.1
max_length: 20
min_length: 0
model_type: wav2vec2
no_repeat_ngram_size: 0
num_adapter_layers: 3
num_attention_heads: 16
num_beam_groups: 1
num_beams: 1
num_codevector_groups: 2
num_codevectors_per_group: 320
num_conv_pos_embedding_groups: 16
num_conv_pos_embeddings: 128
num_feat_extract_layers: 7
num_hidden_layers: 24
num_negatives: 100
num_return_sequences: 1
output_attentions: false
output_hidden_size: 1024
output_hidden_states: false
output_scores: false
pad_token_id: 0
prefix: null
problem_type: null
proj_codevector_dim: 768
pruned_heads: {}
remove_invalid_values: false
repetition_penalty: 1.0
return_dict: true
return_dict_in_generate: false
sep_token_id: null
task_specific_params: null
tdnn_dilation:
- 1
- 2
- 3
- 1
- 1
tdnn_dim:
- 512
- 512
- 512
- 512
- 1500
tdnn_kernel:
- 5
- 3
- 3
- 1
- 1
temperature: 1.0
tie_encoder_decoder: false
tie_word_embeddings: true
tokenizer_class: null
top_k: 50
top_p: 1.0
torch_dtype: float32
torchscript: false
transformers_version: 4.19.0.dev0
typical_p: 1.0
use_bfloat16: false
use_weighted_layer_sum: false
vocab_size: 32
xvector_output_dim: 512
encoder_no_repeat_ngram_size:
desc: null
value: 0
eos_token_id:
desc: null
value: 2
eval_accumulation_steps:
desc: null
value: None
eval_batch_size:
desc: null
value: 8
eval_delay:
desc: null
value: 0
eval_split_name:
desc: null
value: test
eval_steps:
desc: null
value: 500
evaluation_strategy:
desc: null
value: steps
exponential_decay_length_penalty:
desc: null
value: null
finetuning_task:
desc: null
value: null
forced_bos_token_id:
desc: null
value: null
forced_eos_token_id:
desc: null
value: null
fp16:
desc: null
value: true
fp16_backend:
desc: null
value: auto
fp16_full_eval:
desc: null
value: false
fp16_opt_level:
desc: null
value: O1
generation_max_length:
desc: null
value: 40
generation_num_beams:
desc: null
value: 1
gradient_accumulation_steps:
desc: null
value: 8
gradient_checkpointing:
desc: null
value: true
greater_is_better:
desc: null
value: true
group_by_length:
desc: null
value: true
half_precision_backend:
desc: null
value: amp
hidden_dropout:
desc: null
value: 0.18004101365999406
hub_model_id:
desc: null
value: None
hub_private_repo:
desc: null
value: false
hub_strategy:
desc: null
value: every_save
hub_token:
desc: null
value: <HUB_TOKEN>
id2label:
desc: null
value:
'0': LABEL_0
'1': LABEL_1
ignore_data_skip:
desc: null
value: false
include_inputs_for_metrics:
desc: null
value: false
is_decoder:
desc: null
value: false
is_encoder_decoder:
desc: null
value: true
label2id:
desc: null
value:
LABEL_0: 0
LABEL_1: 1
label_names:
desc: null
value: None
label_smoothing_factor:
desc: null
value: 0.0
language:
desc: null
value: fr.en
learning_rate:
desc: null
value: 0.0002757119755681108
length_column_name:
desc: null
value: length
length_penalty:
desc: null
value: 1.0
load_best_model_at_end:
desc: null
value: true
local_rank:
desc: null
value: -1
log_level:
desc: null
value: -1
log_level_replica:
desc: null
value: -1
log_on_each_node:
desc: null
value: true
logging_dir:
desc: null
value: ./runs/May05_16-32-27_sanchit--v100
logging_first_step:
desc: null
value: false
logging_nan_inf_filter:
desc: null
value: true
logging_steps:
desc: null
value: 1
logging_strategy:
desc: null
value: steps
lr_scheduler_type:
desc: null
value: linear
max_duration_in_seconds:
desc: null
value: 20
max_grad_norm:
desc: null
value: 1.0
max_length:
desc: null
value: 40
max_steps:
desc: null
value: -1
metric_for_best_model:
desc: null
value: bleu
min_length:
desc: null
value: 0
model_name_or_path:
desc: null
value: ./
model_type:
desc: null
value: speech-encoder-decoder
mp_parameters:
desc: null
value: ''
no_cuda:
desc: null
value: false
no_repeat_ngram_size:
desc: null
value: 0
num_beam_groups:
desc: null
value: 1
num_beams:
desc: null
value: 1
num_return_sequences:
desc: null
value: 1
num_train_epochs:
desc: null
value: 3
optim:
desc: null
value: adamw_hf
output_attentions:
desc: null
value: false
output_dir:
desc: null
value: ./
output_hidden_states:
desc: null
value: false
output_scores:
desc: null
value: false
overwrite_output_dir:
desc: null
value: true
pad_token_id:
desc: null
value: 1
past_index:
desc: null
value: -1
per_device_eval_batch_size:
desc: null
value: 8
per_device_train_batch_size:
desc: null
value: 8
per_gpu_eval_batch_size:
desc: null
value: None
per_gpu_train_batch_size:
desc: null
value: None
predict_with_generate:
desc: null
value: true
prediction_loss_only:
desc: null
value: false
prefix:
desc: null
value: null
problem_type:
desc: null
value: null
processor_class:
desc: null
value: Wav2Vec2Processor
pruned_heads:
desc: null
value: {}
push_to_hub:
desc: null
value: true
push_to_hub_model_id:
desc: null
value: None
push_to_hub_organization:
desc: null
value: None
push_to_hub_token:
desc: null
value: <PUSH_TO_HUB_TOKEN>
remove_invalid_values:
desc: null
value: false
remove_unused_columns:
desc: null
value: true
repetition_penalty:
desc: null
value: 1.0
report_to:
desc: null
value: '[''tensorboard'', ''wandb'', ''codecarbon'']'
resume_from_checkpoint:
desc: null
value: None
return_dict:
desc: null
value: true
return_dict_in_generate:
desc: null
value: false
run_name:
desc: null
value: ./
save_on_each_node:
desc: null
value: false
save_steps:
desc: null
value: 500
save_strategy:
desc: null
value: steps
save_total_limit:
desc: null
value: None
seed:
desc: null
value: 42
sep_token_id:
desc: null
value: null
sharded_ddp:
desc: null
value: '[]'
skip_memory_metrics:
desc: null
value: true
sortish_sampler:
desc: null
value: false
task:
desc: null
value: covost2
task_specific_params:
desc: null
value: null
temperature:
desc: null
value: 1.0
tf32:
desc: null
value: None
tie_encoder_decoder:
desc: null
value: false
tie_word_embeddings:
desc: null
value: false
tokenizer_class:
desc: null
value: null
top_k:
desc: null
value: 50
top_p:
desc: null
value: 1.0
torch_dtype:
desc: null
value: torch.float32
torchscript:
desc: null
value: false
tpu_metrics_debug:
desc: null
value: false
tpu_num_cores:
desc: null
value: None
train_batch_size:
desc: null
value: 8
transformers_version:
desc: null
value: null
typical_p:
desc: null
value: 1.0
use_bfloat16:
desc: null
value: false
use_cache:
desc: null
value: false
use_legacy_prediction_loop:
desc: null
value: false
warmup_ratio:
desc: null
value: 0.0
warmup_steps:
desc: null
value: 500
weight_decay:
desc: null
value: 0.0
xpu_backend:
desc: null
value: None