sign
/

AmitMY commited on
Commit
4d1b0cf
·
1 Parent(s): 8b1dd23

Upload args.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. args.yaml +151 -0
args.yaml ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ allow_missing_params: false
2
+ amp: false
3
+ apex_amp: false
4
+ batch_sentences_multiple_of: 8
5
+ batch_size: 512
6
+ batch_type: word
7
+ bow_task_pos_weight: 10
8
+ bow_task_weight: 1.0
9
+ bucket_scaling: false
10
+ bucket_width: 8
11
+ cache_last_best_params: 0
12
+ cache_metric: perplexity
13
+ cache_strategy: best
14
+ checkpoint_improvement_threshold: 0.0
15
+ checkpoint_interval: 4000
16
+ clamp_to_dtype: false
17
+ config: null
18
+ decode_and_evaluate: 500
19
+ decoder: transformer
20
+ deepspeed_bf16: false
21
+ deepspeed_fp16: false
22
+ device_id: 0
23
+ dist: false
24
+ dry_run: false
25
+ dtype: float32
26
+ embed_dropout:
27
+ - 0.0
28
+ - 0.0
29
+ encoder: transformer
30
+ end_of_prepending_tag: null
31
+ env: null
32
+ fixed_param_names: []
33
+ fixed_param_strategy: null
34
+ gradient_clipping_threshold: 1.0
35
+ gradient_clipping_type: none
36
+ ignore_extra_params: false
37
+ initial_learning_rate: 0.0002
38
+ keep_initializations: false
39
+ keep_last_params: -1
40
+ label_smoothing: 0.1
41
+ label_smoothing_impl: mxnet
42
+ learning_rate_reduce_factor: 0.9
43
+ learning_rate_reduce_num_not_improved: 8
44
+ learning_rate_scheduler_type: plateau-reduce
45
+ learning_rate_warmup: 0
46
+ length_task: null
47
+ length_task_layers: 1
48
+ length_task_weight: 1.0
49
+ lhuc: null
50
+ local_rank: null
51
+ loglevel: INFO
52
+ loglevel_secondary_workers: INFO
53
+ max_checkpoints: null
54
+ max_num_checkpoint_not_improved: 20
55
+ max_num_epochs: null
56
+ max_samples: null
57
+ max_seconds: null
58
+ max_seq_len:
59
+ - 512
60
+ - 128
61
+ max_updates: null
62
+ min_num_epochs: null
63
+ min_samples: null
64
+ min_updates: null
65
+ momentum: 0.0
66
+ neural_vocab_selection: null
67
+ neural_vocab_selection_block_loss: false
68
+ no_bucketing: false
69
+ no_logfile: false
70
+ no_reload_on_learning_rate_reduce: false
71
+ num_embed:
72
+ - null
73
+ - null
74
+ num_layers:
75
+ - 6
76
+ - 6
77
+ num_words:
78
+ - 0
79
+ - 0
80
+ optimized_metric: chrf
81
+ optimizer: adam
82
+ optimizer_betas:
83
+ - 0.9
84
+ - 0.999
85
+ optimizer_eps: 1.0e-08
86
+ output: /shares/volk.cl.uzh/amoryo/checkpoints/sockeye/expanded-cleaned/model
87
+ overwrite_output: false
88
+ pad_vocab_to_multiple_of: 8
89
+ params: /shares/volk.cl.uzh/amoryo/checkpoints/sockeye/expanded/model/params.best
90
+ prepared_data: /shares/volk.cl.uzh/amoryo/checkpoints/sockeye/expanded-cleaned/train_data
91
+ quiet: false
92
+ quiet_secondary_workers: false
93
+ seed: 1
94
+ shared_vocab: false
95
+ source: null
96
+ source_factor_vocabs: []
97
+ source_factors: []
98
+ source_factors_combine: []
99
+ source_factors_num_embed: []
100
+ source_factors_share_embedding: []
101
+ source_factors_use_source_vocab: []
102
+ source_vocab: null
103
+ stop_training_on_decoder_failure: false
104
+ target: null
105
+ target_factor_vocabs: []
106
+ target_factors: []
107
+ target_factors_combine: []
108
+ target_factors_num_embed: []
109
+ target_factors_share_embedding: []
110
+ target_factors_use_target_vocab: []
111
+ target_factors_weight:
112
+ - 1.0
113
+ target_vocab: null
114
+ tf32: true
115
+ transformer_activation_type:
116
+ - relu
117
+ - relu
118
+ transformer_attention_heads:
119
+ - 8
120
+ - 8
121
+ transformer_block_prepended_cross_attention: false
122
+ transformer_dropout_act: &id001
123
+ - 0.1
124
+ - 0.1
125
+ transformer_dropout_attention: *id001
126
+ transformer_dropout_prepost: *id001
127
+ transformer_feed_forward_num_hidden:
128
+ - 2048
129
+ - 2048
130
+ transformer_feed_forward_use_glu: false
131
+ transformer_model_size:
132
+ - 512
133
+ - 512
134
+ transformer_positional_embedding_type: fixed
135
+ transformer_postprocess:
136
+ - dr
137
+ - dr
138
+ transformer_preprocess:
139
+ - n
140
+ - n
141
+ update_interval: 1
142
+ use_cpu: false
143
+ validation_source: /home/amoryo/sign-language/signbank-annotation/signbank-plus/data/parallel/cleaned/dev.source.tokenized
144
+ validation_source_factors: []
145
+ validation_target: /shares/volk.cl.uzh/amoryo/checkpoints/sockeye/expanded-cleaned/dev.target.bpe
146
+ validation_target_factors: []
147
+ weight_decay: 0.0
148
+ weight_tying_type: none
149
+ word_min_count:
150
+ - 1
151
+ - 1