sanchit-gandhi's picture
Training in progress, step 500
a5afdd5
wandb_version: 1
_n_gpu:
desc: null
value: 1
_name_or_path:
desc: null
value: ./
_wandb:
desc: null
value:
cli_version: 0.12.10
framework: huggingface
huggingface_version: 4.17.0.dev0
is_jupyter_run: false
is_kaggle_kernel: false
m:
- 1: train/global_step
6:
- 3
python_version: 3.9.5
start_time: 1646098184
t:
1:
- 1
- 5
- 11
2:
- 1
- 5
- 11
3:
- 1
- 7
- 13
4: 3.9.5
5: 0.12.10
6: 4.17.0.dev0
8:
- 5
adafactor:
desc: null
value: false
adam_beta1:
desc: null
value: 0.9
adam_beta2:
desc: null
value: 0.999
adam_epsilon:
desc: null
value: 1.0e-08
add_cross_attention:
desc: null
value: false
architectures:
desc: null
value:
- SpeechEncoderDecoderModel
bad_words_ids:
desc: null
value: null
bf16:
desc: null
value: false
bf16_full_eval:
desc: null
value: false
bos_token_id:
desc: null
value: null
chunk_size_feed_forward:
desc: null
value: 0
cross_attention_hidden_size:
desc: null
value: null
dataloader_drop_last:
desc: null
value: false
dataloader_num_workers:
desc: null
value: 0
dataloader_pin_memory:
desc: null
value: true
ddp_bucket_cap_mb:
desc: null
value: None
ddp_find_unused_parameters:
desc: null
value: None
debug:
desc: null
value: '[]'
decoder:
desc: null
value:
_name_or_path: gpt2-medium
activation_function: gelu_new
add_cross_attention: true
architectures:
- GPT2LMHeadModel
attn_pdrop: 0.0
bad_words_ids: null
bos_token_id: 50256
chunk_size_feed_forward: 0
cross_attention_hidden_size: null
decoder_start_token_id: null
diversity_penalty: 0.0
do_sample: false
early_stopping: false
embd_pdrop: 0.0
encoder_no_repeat_ngram_size: 0
eos_token_id: 50256
finetuning_task: null
forced_bos_token_id: null
forced_eos_token_id: null
id2label:
'0': LABEL_0
'1': LABEL_1
initializer_range: 0.02
is_decoder: true
is_encoder_decoder: false
label2id:
LABEL_0: 0
LABEL_1: 1
layer_norm_epsilon: 1.0e-05
length_penalty: 1.0
max_length: 20
min_length: 0
model_type: gpt2
n_ctx: 1024
n_embd: 1024
n_head: 16
n_inner: null
n_layer: 24
n_positions: 1024
n_special: 0
no_repeat_ngram_size: 0
num_beam_groups: 1
num_beams: 1
num_return_sequences: 1
output_attentions: false
output_hidden_states: false
output_scores: false
pad_token_id: null
predict_special_tokens: true
prefix: null
problem_type: null
pruned_heads: {}
remove_invalid_values: false
reorder_and_upcast_attn: false
repetition_penalty: 1.0
resid_pdrop: 0.0
return_dict: true
return_dict_in_generate: false
scale_attn_by_inverse_layer_idx: false
scale_attn_weights: true
sep_token_id: null
summary_activation: null
summary_first_dropout: 0.0
summary_proj_to_labels: true
summary_type: cls_index
summary_use_proj: true
task_specific_params:
text-generation:
do_sample: true
max_length: 50
temperature: 1.0
tie_encoder_decoder: false
tie_word_embeddings: true
tokenizer_class: null
top_k: 50
top_p: 1.0
torch_dtype: null
torchscript: false
transformers_version: 4.17.0.dev0
use_bfloat16: false
use_cache: false
vocab_size: 50257
decoder_start_token_id:
desc: null
value: 50256
deepspeed:
desc: null
value: None
disable_tqdm:
desc: null
value: false
diversity_penalty:
desc: null
value: 0.0
do_eval:
desc: null
value: true
do_predict:
desc: null
value: false
do_sample:
desc: null
value: false
do_train:
desc: null
value: true
early_stopping:
desc: null
value: false
encoder:
desc: null
value:
_name_or_path: facebook/wav2vec2-large-lv60
activation_dropout: 0.0
adapter_kernel_size: 3
adapter_stride: 2
add_adapter: true
add_cross_attention: false
apply_spec_augment: false
architectures:
- Wav2Vec2ForPreTraining
attention_dropout: 0.0
bad_words_ids: null
bos_token_id: 1
chunk_size_feed_forward: 0
classifier_proj_size: 256
codevector_dim: 768
contrastive_logits_temperature: 0.1
conv_bias: true
conv_dim:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
conv_kernel:
- 10
- 3
- 3
- 3
- 3
- 2
- 2
conv_stride:
- 5
- 2
- 2
- 2
- 2
- 2
- 2
cross_attention_hidden_size: null
ctc_loss_reduction: sum
ctc_zero_infinity: false
decoder_start_token_id: null
diversity_loss_weight: 0.1
diversity_penalty: 0.0
do_sample: false
do_stable_layer_norm: true
early_stopping: false
encoder_no_repeat_ngram_size: 0
eos_token_id: 2
feat_extract_activation: gelu
feat_extract_dropout: 0.0
feat_extract_norm: layer
feat_proj_dropout: 0.0
feat_quantizer_dropout: 0.0
final_dropout: 0.0
finetuning_task: null
forced_bos_token_id: null
forced_eos_token_id: null
gradient_checkpointing: false
hidden_act: gelu
hidden_dropout: 0.0
hidden_dropout_prob: 0.0
hidden_size: 1024
id2label:
'0': LABEL_0
'1': LABEL_1
initializer_range: 0.02
intermediate_size: 4096
is_decoder: false
is_encoder_decoder: false
label2id:
LABEL_0: 0
LABEL_1: 1
layer_norm_eps: 1.0e-05
layerdrop: 0.0
length_penalty: 1.0
mask_feature_length: 10
mask_feature_min_masks: 0
mask_feature_prob: 0.0
mask_time_length: 10
mask_time_min_masks: 2
mask_time_prob: 0.0
max_length: 20
min_length: 0
model_type: wav2vec2
no_repeat_ngram_size: 0
num_adapter_layers: 3
num_attention_heads: 16
num_beam_groups: 1
num_beams: 1
num_codevector_groups: 2
num_codevectors_per_group: 320
num_conv_pos_embedding_groups: 16
num_conv_pos_embeddings: 128
num_feat_extract_layers: 7
num_hidden_layers: 24
num_negatives: 100
num_return_sequences: 1
output_attentions: false
output_hidden_size: 1024
output_hidden_states: false
output_scores: false
pad_token_id: 0
prefix: null
problem_type: null
proj_codevector_dim: 768
pruned_heads: {}
remove_invalid_values: false
repetition_penalty: 1.0
return_dict: true
return_dict_in_generate: false
sep_token_id: null
task_specific_params: null
tdnn_dilation:
- 1
- 2
- 3
- 1
- 1
tdnn_dim:
- 512
- 512
- 512
- 512
- 1500
tdnn_kernel:
- 5
- 3
- 3
- 1
- 1
temperature: 1.0
tie_encoder_decoder: false
tie_word_embeddings: true
tokenizer_class: null
top_k: 50
top_p: 1.0
torch_dtype: null
torchscript: false
transformers_version: 4.17.0.dev0
use_bfloat16: false
use_weighted_layer_sum: false
vocab_size: 32
xvector_output_dim: 512
encoder_no_repeat_ngram_size:
desc: null
value: 0
eos_token_id:
desc: null
value: 50256
eval_accumulation_steps:
desc: null
value: None
eval_batch_size:
desc: null
value: 16
eval_steps:
desc: null
value: 500
evaluation_strategy:
desc: null
value: steps
finetuning_task:
desc: null
value: null
forced_bos_token_id:
desc: null
value: null
forced_eos_token_id:
desc: null
value: null
fp16:
desc: null
value: true
fp16_backend:
desc: null
value: auto
fp16_full_eval:
desc: null
value: false
fp16_opt_level:
desc: null
value: O1
generation_max_length:
desc: null
value: 40
generation_num_beams:
desc: null
value: 1
gradient_accumulation_steps:
desc: null
value: 8
gradient_checkpointing:
desc: null
value: true
greater_is_better:
desc: null
value: None
group_by_length:
desc: null
value: true
half_precision_backend:
desc: null
value: amp
hub_model_id:
desc: null
value: None
hub_strategy:
desc: null
value: every_save
hub_token:
desc: null
value: <HUB_TOKEN>
id2label:
desc: null
value:
'0': LABEL_0
'1': LABEL_1
ignore_data_skip:
desc: null
value: false
is_decoder:
desc: null
value: false
is_encoder_decoder:
desc: null
value: true
label2id:
desc: null
value:
LABEL_0: 0
LABEL_1: 1
label_names:
desc: null
value: None
label_smoothing_factor:
desc: null
value: 0.0
learning_rate:
desc: null
value: 1.0e-05
length_column_name:
desc: null
value: input_length
length_penalty:
desc: null
value: 1.0
load_best_model_at_end:
desc: null
value: false
local_rank:
desc: null
value: -1
log_level:
desc: null
value: -1
log_level_replica:
desc: null
value: -1
log_on_each_node:
desc: null
value: true
logging_dir:
desc: null
value: ./runs/Mar01_01-29-04_sanchit--v100
logging_first_step:
desc: null
value: false
logging_nan_inf_filter:
desc: null
value: true
logging_steps:
desc: null
value: 1
logging_strategy:
desc: null
value: steps
lr_scheduler_type:
desc: null
value: linear
max_grad_norm:
desc: null
value: 1.0
max_length:
desc: null
value: 50
max_steps:
desc: null
value: -1
metric_for_best_model:
desc: null
value: None
min_length:
desc: null
value: 0
model_type:
desc: null
value: speech-encoder-decoder
mp_parameters:
desc: null
value: ''
no_cuda:
desc: null
value: false
no_repeat_ngram_size:
desc: null
value: 0
num_beam_groups:
desc: null
value: 1
num_beams:
desc: null
value: 1
num_return_sequences:
desc: null
value: 1
num_train_epochs:
desc: null
value: 1.0
optim:
desc: null
value: adamw_hf
output_attentions:
desc: null
value: false
output_dir:
desc: null
value: ./
output_hidden_states:
desc: null
value: false
output_scores:
desc: null
value: false
overwrite_output_dir:
desc: null
value: true
pad_token_id:
desc: null
value: 50256
past_index:
desc: null
value: -1
per_device_eval_batch_size:
desc: null
value: 16
per_device_train_batch_size:
desc: null
value: 16
per_gpu_eval_batch_size:
desc: null
value: None
per_gpu_train_batch_size:
desc: null
value: None
predict_with_generate:
desc: null
value: true
prediction_loss_only:
desc: null
value: false
prefix:
desc: null
value: null
problem_type:
desc: null
value: null
processor_class:
desc: null
value: Wav2Vec2Processor
pruned_heads:
desc: null
value: {}
push_to_hub:
desc: null
value: true
push_to_hub_model_id:
desc: null
value: None
push_to_hub_organization:
desc: null
value: None
push_to_hub_token:
desc: null
value: <PUSH_TO_HUB_TOKEN>
remove_invalid_values:
desc: null
value: false
remove_unused_columns:
desc: null
value: true
repetition_penalty:
desc: null
value: 1.0
report_to:
desc: null
value: '[''wandb'']'
resume_from_checkpoint:
desc: null
value: None
return_dict:
desc: null
value: true
return_dict_in_generate:
desc: null
value: false
run_name:
desc: null
value: ./
save_on_each_node:
desc: null
value: false
save_steps:
desc: null
value: 500
save_strategy:
desc: null
value: steps
save_total_limit:
desc: null
value: 1
seed:
desc: null
value: 42
sep_token_id:
desc: null
value: null
sharded_ddp:
desc: null
value: '[]'
skip_memory_metrics:
desc: null
value: true
sortish_sampler:
desc: null
value: false
task_specific_params:
desc: null
value: null
temperature:
desc: null
value: 1.0
tf32:
desc: null
value: None
tie_encoder_decoder:
desc: null
value: false
tie_word_embeddings:
desc: null
value: false
tokenizer_class:
desc: null
value: null
top_k:
desc: null
value: 50
top_p:
desc: null
value: 1.0
torch_dtype:
desc: null
value: torch.float32
torchscript:
desc: null
value: false
tpu_metrics_debug:
desc: null
value: false
tpu_num_cores:
desc: null
value: None
train_batch_size:
desc: null
value: 16
transformers_version:
desc: null
value: null
use_bfloat16:
desc: null
value: false
use_cache:
desc: null
value: false
use_legacy_prediction_loop:
desc: null
value: false
warmup_ratio:
desc: null
value: 0.0
warmup_steps:
desc: null
value: 500
weight_decay:
desc: null
value: 0.0
xpu_backend:
desc: null
value: None