AmitMY's picture
Upload folder using huggingface_hub
3a45c3b verified
allow_missing_params: false
amp: false
apex_amp: false
batch_sentences_multiple_of: 8
batch_size: 1025
batch_type: word
bow_task_pos_weight: 10
bow_task_weight: 1.0
bucket_scaling: false
bucket_width: 8
cache_last_best_params: 0
cache_metric: perplexity
cache_strategy: best
checkpoint_improvement_threshold: 0.0
checkpoint_interval: 1000
clamp_to_dtype: false
config: null
decode_and_evaluate: 1000
decoder: transformer
deepspeed_bf16: false
deepspeed_fp16: false
device_id: 0
dist: false
dry_run: false
dtype: float32
embed_dropout:
- 0.5
- 0.5
encoder: transformer
end_of_prepending_tag: null
env: null
fixed_param_names: []
fixed_param_strategy: null
gradient_clipping_threshold: 1.0
gradient_clipping_type: none
ignore_extra_params: false
initial_learning_rate: 0.0002
keep_initializations: false
keep_last_params: -1
label_smoothing: 0.2
label_smoothing_impl: torch
learning_rate_reduce_factor: 0.7
learning_rate_reduce_num_not_improved: 8
learning_rate_scheduler_type: plateau-reduce
learning_rate_warmup: 1000
length_task: null
length_task_layers: 1
length_task_weight: 1.0
lhuc: null
local_rank: null
loglevel: INFO
loglevel_secondary_workers: INFO
max_checkpoints: null
max_num_checkpoint_not_improved: 100
max_num_epochs: null
max_samples: null
max_seconds: null
max_seq_len:
- 512
- 512
max_updates: null
min_num_epochs: 10
min_samples: null
min_updates: null
momentum: 0.0
neural_vocab_selection: null
neural_vocab_selection_block_loss: false
no_bucketing: true
no_logfile: false
no_reload_on_learning_rate_reduce: false
num_embed:
- null
- null
num_layers:
- 6
- 6
num_words:
- 0
- 0
optimized_metric: signwriting-similarity
optimizer: adam
optimizer_betas:
- 0.9
- 0.999
optimizer_eps: 1.0e-08
output: /shares/iict-sp2.ebling.cl.uzh/amoryo/checkpoints/signwriting-translation/spoken-to-signed/target-factors-gpt-tuned/model
overwrite_output: false
pad_vocab_to_multiple_of: 8
params: /shares/iict-sp2.ebling.cl.uzh/amoryo/checkpoints/signwriting-translation/spoken-to-signed/target-factors-gpt/model/params.best
prepared_data: /shares/iict-sp2.ebling.cl.uzh/amoryo/checkpoints/signwriting-translation/spoken-to-signed/target-factors-gpt-tuned/train_data
quiet: false
quiet_secondary_workers: false
seed: 1
shared_vocab: false
source: null
source_factor_vocabs: []
source_factors: []
source_factors_combine: []
source_factors_num_embed: []
source_factors_share_embedding: []
source_factors_use_source_vocab: []
source_vocab: null
stop_training_on_decoder_failure: false
target: null
target_factor_vocabs: []
target_factors: []
target_factors_combine:
- sum
- sum
- sum
- sum
target_factors_num_embed: []
target_factors_share_embedding:
- false
- false
- false
- false
target_factors_use_target_vocab: []
target_factors_weight:
- 0.2
target_vocab: null
tf32: true
transformer_activation_type:
- relu
- relu
transformer_attention_heads:
- 8
- 8
transformer_block_prepended_cross_attention: false
transformer_dropout_act:
- 0.2
- 0.2
transformer_dropout_attention:
- 0.2
- 0.2
transformer_dropout_prepost:
- 0.2
- 0.2
transformer_feed_forward_num_hidden:
- 2048
- 2048
transformer_feed_forward_use_glu: false
transformer_model_size:
- 512
- 512
transformer_positional_embedding_type: fixed
transformer_postprocess:
- dr
- dr
transformer_preprocess:
- n
- n
update_interval: 1
use_cpu: false
validation_source: /home/amoryo/sign-language/signwriting-translation/parallel-clean/spoken-to-signed/test/source.txt
validation_source_factors: []
validation_target: /home/amoryo/sign-language/signwriting-translation/parallel-clean/spoken-to-signed/test/target_0.txt
validation_target_factors:
- /home/amoryo/sign-language/signwriting-translation/parallel-clean/spoken-to-signed/test/target_1.txt
- /home/amoryo/sign-language/signwriting-translation/parallel-clean/spoken-to-signed/test/target_2.txt
- /home/amoryo/sign-language/signwriting-translation/parallel-clean/spoken-to-signed/test/target_3.txt
- /home/amoryo/sign-language/signwriting-translation/parallel-clean/spoken-to-signed/test/target_4.txt
weight_decay: 0.0
weight_tying_type: trg_softmax
word_min_count:
- 1
- 1