haukurpj commited on
Commit
25fcbb7
1 Parent(s): f4a9969
.gitattributes CHANGED
@@ -30,3 +30,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mBART based translation model
2
+ model_path: /data/scratch/haukur/document_translation/checkpoints/v2.all.wordnoise0.06.fragmentnoise0.06.lr8e-06.dropout0.1.spmalpha0.7.seed228/checkpoint_best.pt
3
+ num_updates: 14500
4
+ Source language: en_XX
5
+ Target language: is_IS
6
+
7
+ ---
8
+ tags:
9
+ - translation
10
+ inference:
11
+ parameters:
12
+ src_lang: "is_IS"
13
+ tgt_lang: "en_XX"
14
+ decoder_start_token_id: 250004
15
+ max_length: 512
16
+ widget:
17
+ - text: "Ég ætti að kaupa bát."
18
+ language:
19
+ - en
20
+ - is
21
+ datasets:
22
+ - train.fornsogur,train.greynir_articles,train.greynir_articles_2021,train.hirslan,train.ic3_filtered,train.rafbokavefurinn,train.rmh_filtered,train.wikipedia,train.abstracts,train.studentabladid,train.jw300,train.rannsoknarskyrsla_althingis,train.eea,train.fornsogur,train.greynir_articles,train.greynir_articles_2021,train.hirslan,train.ic3_filtered,train.rafbokavefurinn,train.rmh_filtered,train.wikipedia,train.bible
23
+ ---
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "architectures": [
5
+ "MBartForConditionalGeneration"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": 0.0,
10
+ "d_model": 1024,
11
+ "decoder_attention_heads": 16,
12
+ "decoder_ffn_dim": 4096,
13
+ "decoder_layerdrop": 0.0,
14
+ "decoder_layers": 12,
15
+ "dropout": 0.1,
16
+ "encoder_attention_heads": 16,
17
+ "encoder_ffn_dim": 4096,
18
+ "encoder_layerdrop": 0.0,
19
+ "encoder_layers": 12,
20
+ "eos_token_id": 2,
21
+ "forced_eos_token_id": 2,
22
+ "init_std": 0.02,
23
+ "is_encoder_decoder": true,
24
+ "max_length": 1024,
25
+ "max_position_embeddings": 1024,
26
+ "model_type": "mbart",
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 1,
29
+ "scale_embedding": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.22.2",
32
+ "use_cache": true,
33
+ "vocab_size": 250028
34
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c6e0004cfeae43578953872d3e16d483c7a4778241e7afce097cb4a03012cc
3
+ size 2444576505
special_tokens_map.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "is_IS",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN"
28
+ ],
29
+ "bos_token": "<s>",
30
+ "cls_token": "<s>",
31
+ "eos_token": "</s>",
32
+ "mask_token": {
33
+ "content": "<mask>",
34
+ "lstrip": true,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ },
39
+ "pad_token": "<pad>",
40
+ "sep_token": "</s>",
41
+ "unk_token": "<unk>"
42
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbbbe4d89302672c9495118054f09c1a6e24c908b3a97866cbfc7a3bbb2aa14c
3
+ size 17088357
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "model_max_length": 1024,
15
+ "name_or_path": "mideind/tokenizer-mbart-25-enis",
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "special_tokens_map_file": null,
19
+ "src_lang": "en_XX",
20
+ "tgt_lang": "is_IS",
21
+ "tokenizer_class": "MBartTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
train_args.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "no_progress_bar": true,
3
+ "log_interval": 5,
4
+ "log_format": "simple",
5
+ "tensorboard_logdir": "/data/scratch/haukur/document_translation/tensorboard.logs/v2.all.wordnoise0.06.fragmentnoise0.06.lr8e-06.dropout0.1.spmalpha0.7.seed228",
6
+ "seed": 228,
7
+ "cpu": false,
8
+ "tpu": false,
9
+ "bf16": false,
10
+ "memory_efficient_bf16": false,
11
+ "fp16": true,
12
+ "memory_efficient_fp16": false,
13
+ "fp16_no_flatten_grads": false,
14
+ "fp16_init_scale": 128,
15
+ "fp16_scale_window": null,
16
+ "fp16_scale_tolerance": 0.0,
17
+ "min_loss_scale": 0.0001,
18
+ "threshold_loss_scale": null,
19
+ "user_dir": "/data/scratch/haukur/document_translation/fairseq_user_dir",
20
+ "empty_cache_freq": 0,
21
+ "all_gather_list_size": 16384,
22
+ "model_parallel_size": 1,
23
+ "checkpoint_suffix": "",
24
+ "checkpoint_shard_count": 1,
25
+ "quantization_config_path": null,
26
+ "profile": false,
27
+ "criterion": "cross_entropy",
28
+ "optimizer": "adam",
29
+ "lr_scheduler": "polynomial_decay",
30
+ "tokenizer": null,
31
+ "bpe": "sentencepiece",
32
+ "scoring": "bleu",
33
+ "task": "document_translation_from_pretrained_bart",
34
+ "num_workers": 8,
35
+ "skip_invalid_size_inputs_valid_test": true,
36
+ "max_tokens": 2000,
37
+ "batch_size": null,
38
+ "required_batch_size_multiple": 8,
39
+ "required_seq_len_multiple": 1,
40
+ "dataset_impl": null,
41
+ "data_buffer_size": 10,
42
+ "train_subset": "train.fornsogur,train.greynir_articles,train.greynir_articles_2021,train.hirslan,train.ic3_filtered,train.rafbokavefurinn,train.rmh_filtered,train.wikipedia,train.abstracts,train.studentabladid,train.jw300,train.rannsoknarskyrsla_althingis,train.eea,train.fornsogur,train.greynir_articles,train.greynir_articles_2021,train.hirslan,train.ic3_filtered,train.rafbokavefurinn,train.rmh_filtered,train.wikipedia,train.bible",
43
+ "valid_subset": "valid.newsdev2021,valid.flores_dev",
44
+ "validate_interval": 1,
45
+ "validate_interval_updates": 500,
46
+ "validate_after_updates": 0,
47
+ "fixed_validation_seed": null,
48
+ "disable_validation": false,
49
+ "max_tokens_valid": 2000,
50
+ "batch_size_valid": null,
51
+ "curriculum": 0,
52
+ "gen_subset": "test",
53
+ "num_shards": 1,
54
+ "shard_id": 0,
55
+ "distributed_world_size": 2,
56
+ "distributed_rank": 0,
57
+ "distributed_backend": "nccl",
58
+ "distributed_init_method": "tcp://localhost:18931",
59
+ "distributed_port": -1,
60
+ "device_id": 0,
61
+ "distributed_no_spawn": false,
62
+ "ddp_backend": "no_c10d",
63
+ "bucket_cap_mb": 25,
64
+ "fix_batches_to_gpus": false,
65
+ "find_unused_parameters": false,
66
+ "fast_stat_sync": false,
67
+ "broadcast_buffers": false,
68
+ "distributed_wrapper": "DDP",
69
+ "slowmo_momentum": null,
70
+ "slowmo_algorithm": "LocalSGD",
71
+ "localsgd_frequency": 3,
72
+ "nprocs_per_node": 2,
73
+ "pipeline_model_parallel": false,
74
+ "pipeline_balance": null,
75
+ "pipeline_devices": null,
76
+ "pipeline_chunks": 0,
77
+ "pipeline_encoder_balance": null,
78
+ "pipeline_encoder_devices": null,
79
+ "pipeline_decoder_balance": null,
80
+ "pipeline_decoder_devices": null,
81
+ "pipeline_checkpoint": "never",
82
+ "zero_sharding": "none",
83
+ "arch": "mbart_large",
84
+ "max_epoch": 0,
85
+ "max_update": 15000,
86
+ "stop_time_hours": 0,
87
+ "clip_norm": 3.0,
88
+ "sentence_avg": false,
89
+ "update_freq": [
90
+ 10
91
+ ],
92
+ "lr": [
93
+ 8e-06
94
+ ],
95
+ "min_lr": -1.0,
96
+ "use_bmuf": false,
97
+ "save_dir": "/data/scratch/haukur/document_translation/checkpoints/v2.all.wordnoise0.06.fragmentnoise0.06.lr8e-06.dropout0.1.spmalpha0.7.seed228",
98
+ "restore_file": "/data/scratch/haukur/document_translation/checkpoints/all.wordnoise0.06.fragmentnoise0.06.lr1e-05.dropout0.1.spmalpha0.7.seed226/checkpoint_best.pt",
99
+ "finetune_from_model": null,
100
+ "reset_dataloader": true,
101
+ "reset_lr_scheduler": true,
102
+ "reset_meters": true,
103
+ "reset_optimizer": true,
104
+ "optimizer_overrides": "{}",
105
+ "save_interval": 1,
106
+ "save_interval_updates": 500,
107
+ "keep_interval_updates": 2,
108
+ "keep_last_epochs": -1,
109
+ "keep_best_checkpoints": -1,
110
+ "no_save": false,
111
+ "no_epoch_checkpoints": false,
112
+ "no_last_checkpoints": false,
113
+ "no_save_optimizer_state": false,
114
+ "best_checkpoint_metric": "ppl",
115
+ "maximize_best_checkpoint_metric": false,
116
+ "patience": 10,
117
+ "no_token_positional_embeddings": false,
118
+ "no_cross_attention": false,
119
+ "cross_self_attention": false,
120
+ "encoder_layerdrop": 0,
121
+ "decoder_layerdrop": 0,
122
+ "encoder_layers_to_keep": null,
123
+ "decoder_layers_to_keep": null,
124
+ "quant_noise_pq": 0,
125
+ "quant_noise_pq_block_size": 8,
126
+ "quant_noise_scalar": 0,
127
+ "adam_betas": "(0.9, 0.98)",
128
+ "adam_eps": 1e-06,
129
+ "weight_decay": 0.01,
130
+ "use_old_adam": false,
131
+ "force_anneal": null,
132
+ "warmup_updates": 300,
133
+ "end_learning_rate": 0.0,
134
+ "power": 1.0,
135
+ "total_num_update": 1000000,
136
+ "sentencepiece_model": "/data/models/mbart25-cont-enis/sentence.bpe.model",
137
+ "data": "/data/scratch/haukur/document_translation/data",
138
+ "source_lang": "en_XX",
139
+ "target_lang": "is_IS",
140
+ "load_alignments": false,
141
+ "left_pad_source": true,
142
+ "left_pad_target": false,
143
+ "max_source_positions": 1024,
144
+ "max_target_positions": 1024,
145
+ "upsample_primary": 1,
146
+ "truncate_source": false,
147
+ "num_batch_buckets": 0,
148
+ "eval_bleu": false,
149
+ "eval_bleu_detok": "space",
150
+ "eval_bleu_detok_args": null,
151
+ "eval_tokenized_bleu": false,
152
+ "eval_bleu_remove_bpe": null,
153
+ "eval_bleu_args": null,
154
+ "eval_bleu_print_samples": false,
155
+ "langs": "ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,is_IS,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN",
156
+ "prepend_bos": false,
157
+ "max_sentences": 300,
158
+ "bt_subset": "train.fornsogur,train.greynir_articles,train.greynir_articles_2021,train.hirslan,train.ic3_filtered,train.rafbokavefurinn,train.rmh_filtered,train.wikipedia",
159
+ "align_subset": "train.abstracts,train.studentabladid,train.jw300,train.rannsoknarskyrsla_althingis,train.eea,train.fornsogur,train.greynir_articles,train.greynir_articles_2021,train.hirslan,train.ic3_filtered,train.rafbokavefurinn,train.rmh_filtered,train.wikipedia,train.bible",
160
+ "sentencepiece_alpha": 1.0,
161
+ "parallel_prob": 0.33,
162
+ "word_noise_prob": 0.06,
163
+ "fragment_noise_prob": 0.06,
164
+ "max_shuffle_dist": 3,
165
+ "encoder_normalize_before": true,
166
+ "decoder_normalize_before": true,
167
+ "layernorm_embedding": true,
168
+ "dropout": 0.1,
169
+ "attention_dropout": 0.1,
170
+ "no_seed_provided": false,
171
+ "no_scale_embedding": false,
172
+ "encoder_embed_path": null,
173
+ "encoder_embed_dim": 1024,
174
+ "encoder_ffn_embed_dim": 4096,
175
+ "encoder_layers": 12,
176
+ "encoder_attention_heads": 16,
177
+ "encoder_learned_pos": true,
178
+ "decoder_embed_path": null,
179
+ "decoder_embed_dim": 1024,
180
+ "decoder_ffn_embed_dim": 4096,
181
+ "decoder_layers": 12,
182
+ "decoder_attention_heads": 16,
183
+ "decoder_learned_pos": true,
184
+ "relu_dropout": 0.0,
185
+ "adaptive_softmax_cutoff": null,
186
+ "adaptive_softmax_dropout": 0,
187
+ "share_decoder_input_output_embed": true,
188
+ "share_all_embeddings": true,
189
+ "decoder_output_dim": 1024,
190
+ "decoder_input_dim": 1024,
191
+ "activation_fn": "gelu",
192
+ "pooler_activation_fn": "tanh",
193
+ "pooler_dropout": 0.0,
194
+ "distributed_num_procs": 2,
195
+ "activation_dropout": 0.0,
196
+ "adaptive_input": false,
197
+ "tie_adaptive_weights": false
198
+ }