sabbas commited on
Commit
90543d7
·
verified ·
1 Parent(s): f314c74

Upload folder using huggingface_hub

Browse files
Files changed (40) hide show
  1. checkpoint-1610/config.json +61 -0
  2. checkpoint-1610/generation_config.json +16 -0
  3. checkpoint-1610/model.safetensors +3 -0
  4. checkpoint-1610/optimizer.pt +3 -0
  5. checkpoint-1610/rng_state.pth +3 -0
  6. checkpoint-1610/scheduler.pt +3 -0
  7. checkpoint-1610/source.spm +0 -0
  8. checkpoint-1610/special_tokens_map.json +5 -0
  9. checkpoint-1610/target.spm +0 -0
  10. checkpoint-1610/tokenizer_config.json +39 -0
  11. checkpoint-1610/trainer_state.json +161 -0
  12. checkpoint-1610/training_args.bin +3 -0
  13. checkpoint-1610/vocab.json +0 -0
  14. checkpoint-1840/config.json +61 -0
  15. checkpoint-1840/generation_config.json +16 -0
  16. checkpoint-1840/model.safetensors +3 -0
  17. checkpoint-1840/optimizer.pt +3 -0
  18. checkpoint-1840/rng_state.pth +3 -0
  19. checkpoint-1840/scheduler.pt +3 -0
  20. checkpoint-1840/source.spm +0 -0
  21. checkpoint-1840/special_tokens_map.json +5 -0
  22. checkpoint-1840/target.spm +0 -0
  23. checkpoint-1840/tokenizer_config.json +39 -0
  24. checkpoint-1840/trainer_state.json +178 -0
  25. checkpoint-1840/training_args.bin +3 -0
  26. checkpoint-1840/vocab.json +0 -0
  27. checkpoint-2070/config.json +61 -0
  28. checkpoint-2070/generation_config.json +16 -0
  29. checkpoint-2070/model.safetensors +3 -0
  30. checkpoint-2070/optimizer.pt +3 -0
  31. checkpoint-2070/rng_state.pth +3 -0
  32. checkpoint-2070/scheduler.pt +3 -0
  33. checkpoint-2070/source.spm +0 -0
  34. checkpoint-2070/special_tokens_map.json +5 -0
  35. checkpoint-2070/target.spm +0 -0
  36. checkpoint-2070/tokenizer_config.json +39 -0
  37. checkpoint-2070/trainer_state.json +195 -0
  38. checkpoint-2070/training_args.bin +3 -0
  39. checkpoint-2070/vocab.json +0 -0
  40. test_metrics.json +1 -0
checkpoint-1610/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-en-ar",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 62801
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 62801,
25
+ "decoder_vocab_size": 62802,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 62802,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 62801,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.42.4",
59
+ "use_cache": true,
60
+ "vocab_size": 62802
61
+ }
checkpoint-1610/generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 62801
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 62801,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 62801,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.42.4"
16
+ }
checkpoint-1610/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a99b0ef8c6daedf0454a3dd267ab56891cf738aeefabbaacac0846944621c1f7
3
+ size 305452744
checkpoint-1610/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca022c729816f000bd5682f71db79dfae70a5c6b83619da050c104153885a26f
3
+ size 610554131
checkpoint-1610/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1831c2d877434e2ca71aa111ed2cd0d4dae185bb839542f315a1c9b77b1470
3
+ size 14503
checkpoint-1610/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2acb05a53fc1d87f27dded8dd2e8510a49ccdf0200c96a7fe23969533b3977f7
3
+ size 623
checkpoint-1610/source.spm ADDED
Binary file (801 kB). View file
 
checkpoint-1610/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
checkpoint-1610/target.spm ADDED
Binary file (917 kB). View file
 
checkpoint-1610/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "62801": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "return_tensors": "pt",
33
+ "separate_vocabs": false,
34
+ "source_lang": "eng",
35
+ "sp_model_kwargs": {},
36
+ "target_lang": "ara",
37
+ "tokenizer_class": "MarianTokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-1610/trainer_state.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03832539543509483,
3
+ "best_model_checkpoint": "text2gloss_ar/checkpoint-1380",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1610,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 9.208166122436523,
14
+ "learning_rate": 1.933623188405797e-05,
15
+ "loss": 2.726,
16
+ "step": 230
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_char_bleu": 42.04703354604912,
21
+ "eval_loss": 0.8206332921981812,
22
+ "eval_runtime": 31.4529,
23
+ "eval_samples_per_second": 41.236,
24
+ "eval_steps_per_second": 0.668,
25
+ "eval_word_bleu": 24.85614940942985,
26
+ "step": 230
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "grad_norm": 6.826977729797363,
31
+ "learning_rate": 1.8669565217391307e-05,
32
+ "loss": 0.6983,
33
+ "step": 460
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_char_bleu": 74.73746769123755,
38
+ "eval_loss": 0.31659388542175293,
39
+ "eval_runtime": 30.2056,
40
+ "eval_samples_per_second": 42.939,
41
+ "eval_steps_per_second": 0.695,
42
+ "eval_word_bleu": 61.86433916154899,
43
+ "step": 460
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "grad_norm": 5.772645473480225,
48
+ "learning_rate": 1.800289855072464e-05,
49
+ "loss": 0.3167,
50
+ "step": 690
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_char_bleu": 92.15392118666509,
55
+ "eval_loss": 0.12875397503376007,
56
+ "eval_runtime": 29.9006,
57
+ "eval_samples_per_second": 43.377,
58
+ "eval_steps_per_second": 0.702,
59
+ "eval_word_bleu": 85.4787040239448,
60
+ "step": 690
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "grad_norm": 5.245423793792725,
65
+ "learning_rate": 1.7336231884057974e-05,
66
+ "loss": 0.1599,
67
+ "step": 920
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_char_bleu": 97.20198228076768,
72
+ "eval_loss": 0.06993339955806732,
73
+ "eval_runtime": 29.0824,
74
+ "eval_samples_per_second": 44.597,
75
+ "eval_steps_per_second": 0.722,
76
+ "eval_word_bleu": 92.92866546086414,
77
+ "step": 920
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "grad_norm": 4.459423542022705,
82
+ "learning_rate": 1.6669565217391305e-05,
83
+ "loss": 0.0971,
84
+ "step": 1150
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "eval_char_bleu": 97.69668982414296,
89
+ "eval_loss": 0.05042978748679161,
90
+ "eval_runtime": 29.4348,
91
+ "eval_samples_per_second": 44.064,
92
+ "eval_steps_per_second": 0.713,
93
+ "eval_word_bleu": 94.6364324777254,
94
+ "step": 1150
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "grad_norm": 2.5048043727874756,
99
+ "learning_rate": 1.600289855072464e-05,
100
+ "loss": 0.0626,
101
+ "step": 1380
102
+ },
103
+ {
104
+ "epoch": 6.0,
105
+ "eval_char_bleu": 98.60004486244999,
106
+ "eval_loss": 0.03832539543509483,
107
+ "eval_runtime": 29.4835,
108
+ "eval_samples_per_second": 43.991,
109
+ "eval_steps_per_second": 0.712,
110
+ "eval_word_bleu": 96.34412173641856,
111
+ "step": 1380
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "grad_norm": 2.593268871307373,
116
+ "learning_rate": 1.5336231884057972e-05,
117
+ "loss": 0.0507,
118
+ "step": 1610
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "eval_char_bleu": 98.50276122630902,
123
+ "eval_loss": 0.03961858153343201,
124
+ "eval_runtime": 30.4579,
125
+ "eval_samples_per_second": 42.583,
126
+ "eval_steps_per_second": 0.689,
127
+ "eval_word_bleu": 95.94399159765237,
128
+ "step": 1610
129
+ }
130
+ ],
131
+ "logging_steps": 500,
132
+ "max_steps": 6900,
133
+ "num_input_tokens_seen": 0,
134
+ "num_train_epochs": 30,
135
+ "save_steps": 500,
136
+ "stateful_callbacks": {
137
+ "EarlyStoppingCallback": {
138
+ "args": {
139
+ "early_stopping_patience": 3,
140
+ "early_stopping_threshold": 0.01
141
+ },
142
+ "attributes": {
143
+ "early_stopping_patience_counter": 0
144
+ }
145
+ },
146
+ "TrainerControl": {
147
+ "args": {
148
+ "should_epoch_stop": false,
149
+ "should_evaluate": false,
150
+ "should_log": false,
151
+ "should_save": true,
152
+ "should_training_stop": false
153
+ },
154
+ "attributes": {}
155
+ }
156
+ },
157
+ "total_flos": 760319933349888.0,
158
+ "train_batch_size": 32,
159
+ "trial_name": null,
160
+ "trial_params": null
161
+ }
checkpoint-1610/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34aef40641c75ceb07f5ee7c8c3d1673038476484d7d3f2997edc701fec171a
3
+ size 4783
checkpoint-1610/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1840/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-en-ar",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 62801
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 62801,
25
+ "decoder_vocab_size": 62802,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 62802,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 62801,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.42.4",
59
+ "use_cache": true,
60
+ "vocab_size": 62802
61
+ }
checkpoint-1840/generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 62801
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 62801,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 62801,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.42.4"
16
+ }
checkpoint-1840/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8c2156b6a0e74966574eca738ad4d678bd632128e17ee635740721088c424ec
3
+ size 305452744
checkpoint-1840/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2259dda591a3813adc3ad1bce99419ee83ed7c326636215ff205e82fe415990d
3
+ size 610554131
checkpoint-1840/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cbe311e94d9e3c283c82dbfd0ce147207e41af6a435c86141fc1de0ab7aaa29
3
+ size 14503
checkpoint-1840/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675fbf6145bcb29a1206d6d2480b10212fcfd60c69f02f75740831a8974aa74a
3
+ size 623
checkpoint-1840/source.spm ADDED
Binary file (801 kB). View file
 
checkpoint-1840/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
checkpoint-1840/target.spm ADDED
Binary file (917 kB). View file
 
checkpoint-1840/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "62801": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "return_tensors": "pt",
33
+ "separate_vocabs": false,
34
+ "source_lang": "eng",
35
+ "sp_model_kwargs": {},
36
+ "target_lang": "ara",
37
+ "tokenizer_class": "MarianTokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-1840/trainer_state.json ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03635687381029129,
3
+ "best_model_checkpoint": "text2gloss_ar/checkpoint-1840",
4
+ "epoch": 8.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1840,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 9.208166122436523,
14
+ "learning_rate": 1.933623188405797e-05,
15
+ "loss": 2.726,
16
+ "step": 230
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_char_bleu": 42.04703354604912,
21
+ "eval_loss": 0.8206332921981812,
22
+ "eval_runtime": 31.4529,
23
+ "eval_samples_per_second": 41.236,
24
+ "eval_steps_per_second": 0.668,
25
+ "eval_word_bleu": 24.85614940942985,
26
+ "step": 230
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "grad_norm": 6.826977729797363,
31
+ "learning_rate": 1.8669565217391307e-05,
32
+ "loss": 0.6983,
33
+ "step": 460
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_char_bleu": 74.73746769123755,
38
+ "eval_loss": 0.31659388542175293,
39
+ "eval_runtime": 30.2056,
40
+ "eval_samples_per_second": 42.939,
41
+ "eval_steps_per_second": 0.695,
42
+ "eval_word_bleu": 61.86433916154899,
43
+ "step": 460
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "grad_norm": 5.772645473480225,
48
+ "learning_rate": 1.800289855072464e-05,
49
+ "loss": 0.3167,
50
+ "step": 690
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_char_bleu": 92.15392118666509,
55
+ "eval_loss": 0.12875397503376007,
56
+ "eval_runtime": 29.9006,
57
+ "eval_samples_per_second": 43.377,
58
+ "eval_steps_per_second": 0.702,
59
+ "eval_word_bleu": 85.4787040239448,
60
+ "step": 690
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "grad_norm": 5.245423793792725,
65
+ "learning_rate": 1.7336231884057974e-05,
66
+ "loss": 0.1599,
67
+ "step": 920
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_char_bleu": 97.20198228076768,
72
+ "eval_loss": 0.06993339955806732,
73
+ "eval_runtime": 29.0824,
74
+ "eval_samples_per_second": 44.597,
75
+ "eval_steps_per_second": 0.722,
76
+ "eval_word_bleu": 92.92866546086414,
77
+ "step": 920
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "grad_norm": 4.459423542022705,
82
+ "learning_rate": 1.6669565217391305e-05,
83
+ "loss": 0.0971,
84
+ "step": 1150
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "eval_char_bleu": 97.69668982414296,
89
+ "eval_loss": 0.05042978748679161,
90
+ "eval_runtime": 29.4348,
91
+ "eval_samples_per_second": 44.064,
92
+ "eval_steps_per_second": 0.713,
93
+ "eval_word_bleu": 94.6364324777254,
94
+ "step": 1150
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "grad_norm": 2.5048043727874756,
99
+ "learning_rate": 1.600289855072464e-05,
100
+ "loss": 0.0626,
101
+ "step": 1380
102
+ },
103
+ {
104
+ "epoch": 6.0,
105
+ "eval_char_bleu": 98.60004486244999,
106
+ "eval_loss": 0.03832539543509483,
107
+ "eval_runtime": 29.4835,
108
+ "eval_samples_per_second": 43.991,
109
+ "eval_steps_per_second": 0.712,
110
+ "eval_word_bleu": 96.34412173641856,
111
+ "step": 1380
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "grad_norm": 2.593268871307373,
116
+ "learning_rate": 1.5336231884057972e-05,
117
+ "loss": 0.0507,
118
+ "step": 1610
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "eval_char_bleu": 98.50276122630902,
123
+ "eval_loss": 0.03961858153343201,
124
+ "eval_runtime": 30.4579,
125
+ "eval_samples_per_second": 42.583,
126
+ "eval_steps_per_second": 0.689,
127
+ "eval_word_bleu": 95.94399159765237,
128
+ "step": 1610
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "grad_norm": 2.4942216873168945,
133
+ "learning_rate": 1.4669565217391306e-05,
134
+ "loss": 0.036,
135
+ "step": 1840
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_char_bleu": 98.39569733809864,
140
+ "eval_loss": 0.03635687381029129,
141
+ "eval_runtime": 30.2971,
142
+ "eval_samples_per_second": 42.809,
143
+ "eval_steps_per_second": 0.693,
144
+ "eval_word_bleu": 96.0035686359632,
145
+ "step": 1840
146
+ }
147
+ ],
148
+ "logging_steps": 500,
149
+ "max_steps": 6900,
150
+ "num_input_tokens_seen": 0,
151
+ "num_train_epochs": 30,
152
+ "save_steps": 500,
153
+ "stateful_callbacks": {
154
+ "EarlyStoppingCallback": {
155
+ "args": {
156
+ "early_stopping_patience": 3,
157
+ "early_stopping_threshold": 0.01
158
+ },
159
+ "attributes": {
160
+ "early_stopping_patience_counter": 0
161
+ }
162
+ },
163
+ "TrainerControl": {
164
+ "args": {
165
+ "should_epoch_stop": false,
166
+ "should_evaluate": false,
167
+ "should_log": false,
168
+ "should_save": true,
169
+ "should_training_stop": false
170
+ },
171
+ "attributes": {}
172
+ }
173
+ },
174
+ "total_flos": 869236968849408.0,
175
+ "train_batch_size": 32,
176
+ "trial_name": null,
177
+ "trial_params": null
178
+ }
checkpoint-1840/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34aef40641c75ceb07f5ee7c8c3d1673038476484d7d3f2997edc701fec171a
3
+ size 4783
checkpoint-1840/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2070/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-en-ar",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 62801
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 62801,
25
+ "decoder_vocab_size": 62802,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 62802,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 62801,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.42.4",
59
+ "use_cache": true,
60
+ "vocab_size": 62802
61
+ }
checkpoint-2070/generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 62801
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 62801,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 62801,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.42.4"
16
+ }
checkpoint-2070/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfa91dfe245c06b42617223e922f3501645b8678fe3af6c60deb4a8117ae6f0
3
+ size 305452744
checkpoint-2070/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78928934974ccebb2c100a886238db461e4def7bfaa590cd5bed738de43515ab
3
+ size 610554131
checkpoint-2070/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df43f029a2fc1bb14a3582521ba694e61c93eadce6b8cc0cdbad2ff5763bdb31
3
+ size 14503
checkpoint-2070/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfd388feec0549579c96bd9e3714594160145d7a78ae750dea5c170bcad7332
3
+ size 623
checkpoint-2070/source.spm ADDED
Binary file (801 kB). View file
 
checkpoint-2070/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
checkpoint-2070/target.spm ADDED
Binary file (917 kB). View file
 
checkpoint-2070/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "62801": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "return_tensors": "pt",
33
+ "separate_vocabs": false,
34
+ "source_lang": "eng",
35
+ "sp_model_kwargs": {},
36
+ "target_lang": "ara",
37
+ "tokenizer_class": "MarianTokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-2070/trainer_state.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.030597062781453133,
3
+ "best_model_checkpoint": "text2gloss_ar/checkpoint-2070",
4
+ "epoch": 9.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2070,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 9.208166122436523,
14
+ "learning_rate": 1.933623188405797e-05,
15
+ "loss": 2.726,
16
+ "step": 230
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_char_bleu": 42.04703354604912,
21
+ "eval_loss": 0.8206332921981812,
22
+ "eval_runtime": 31.4529,
23
+ "eval_samples_per_second": 41.236,
24
+ "eval_steps_per_second": 0.668,
25
+ "eval_word_bleu": 24.85614940942985,
26
+ "step": 230
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "grad_norm": 6.826977729797363,
31
+ "learning_rate": 1.8669565217391307e-05,
32
+ "loss": 0.6983,
33
+ "step": 460
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_char_bleu": 74.73746769123755,
38
+ "eval_loss": 0.31659388542175293,
39
+ "eval_runtime": 30.2056,
40
+ "eval_samples_per_second": 42.939,
41
+ "eval_steps_per_second": 0.695,
42
+ "eval_word_bleu": 61.86433916154899,
43
+ "step": 460
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "grad_norm": 5.772645473480225,
48
+ "learning_rate": 1.800289855072464e-05,
49
+ "loss": 0.3167,
50
+ "step": 690
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_char_bleu": 92.15392118666509,
55
+ "eval_loss": 0.12875397503376007,
56
+ "eval_runtime": 29.9006,
57
+ "eval_samples_per_second": 43.377,
58
+ "eval_steps_per_second": 0.702,
59
+ "eval_word_bleu": 85.4787040239448,
60
+ "step": 690
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "grad_norm": 5.245423793792725,
65
+ "learning_rate": 1.7336231884057974e-05,
66
+ "loss": 0.1599,
67
+ "step": 920
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_char_bleu": 97.20198228076768,
72
+ "eval_loss": 0.06993339955806732,
73
+ "eval_runtime": 29.0824,
74
+ "eval_samples_per_second": 44.597,
75
+ "eval_steps_per_second": 0.722,
76
+ "eval_word_bleu": 92.92866546086414,
77
+ "step": 920
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "grad_norm": 4.459423542022705,
82
+ "learning_rate": 1.6669565217391305e-05,
83
+ "loss": 0.0971,
84
+ "step": 1150
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "eval_char_bleu": 97.69668982414296,
89
+ "eval_loss": 0.05042978748679161,
90
+ "eval_runtime": 29.4348,
91
+ "eval_samples_per_second": 44.064,
92
+ "eval_steps_per_second": 0.713,
93
+ "eval_word_bleu": 94.6364324777254,
94
+ "step": 1150
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "grad_norm": 2.5048043727874756,
99
+ "learning_rate": 1.600289855072464e-05,
100
+ "loss": 0.0626,
101
+ "step": 1380
102
+ },
103
+ {
104
+ "epoch": 6.0,
105
+ "eval_char_bleu": 98.60004486244999,
106
+ "eval_loss": 0.03832539543509483,
107
+ "eval_runtime": 29.4835,
108
+ "eval_samples_per_second": 43.991,
109
+ "eval_steps_per_second": 0.712,
110
+ "eval_word_bleu": 96.34412173641856,
111
+ "step": 1380
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "grad_norm": 2.593268871307373,
116
+ "learning_rate": 1.5336231884057972e-05,
117
+ "loss": 0.0507,
118
+ "step": 1610
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "eval_char_bleu": 98.50276122630902,
123
+ "eval_loss": 0.03961858153343201,
124
+ "eval_runtime": 30.4579,
125
+ "eval_samples_per_second": 42.583,
126
+ "eval_steps_per_second": 0.689,
127
+ "eval_word_bleu": 95.94399159765237,
128
+ "step": 1610
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "grad_norm": 2.4942216873168945,
133
+ "learning_rate": 1.4669565217391306e-05,
134
+ "loss": 0.036,
135
+ "step": 1840
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_char_bleu": 98.39569733809864,
140
+ "eval_loss": 0.03635687381029129,
141
+ "eval_runtime": 30.2971,
142
+ "eval_samples_per_second": 42.809,
143
+ "eval_steps_per_second": 0.693,
144
+ "eval_word_bleu": 96.0035686359632,
145
+ "step": 1840
146
+ },
147
+ {
148
+ "epoch": 9.0,
149
+ "grad_norm": 3.2667160034179688,
150
+ "learning_rate": 1.400289855072464e-05,
151
+ "loss": 0.0289,
152
+ "step": 2070
153
+ },
154
+ {
155
+ "epoch": 9.0,
156
+ "eval_char_bleu": 98.93912433138856,
157
+ "eval_loss": 0.030597062781453133,
158
+ "eval_runtime": 30.2718,
159
+ "eval_samples_per_second": 42.845,
160
+ "eval_steps_per_second": 0.694,
161
+ "eval_word_bleu": 97.08305126864244,
162
+ "step": 2070
163
+ }
164
+ ],
165
+ "logging_steps": 500,
166
+ "max_steps": 6900,
167
+ "num_input_tokens_seen": 0,
168
+ "num_train_epochs": 30,
169
+ "save_steps": 500,
170
+ "stateful_callbacks": {
171
+ "EarlyStoppingCallback": {
172
+ "args": {
173
+ "early_stopping_patience": 3,
174
+ "early_stopping_threshold": 0.01
175
+ },
176
+ "attributes": {
177
+ "early_stopping_patience_counter": 0
178
+ }
179
+ },
180
+ "TrainerControl": {
181
+ "args": {
182
+ "should_epoch_stop": false,
183
+ "should_evaluate": false,
184
+ "should_log": false,
185
+ "should_save": true,
186
+ "should_training_stop": true
187
+ },
188
+ "attributes": {}
189
+ }
190
+ },
191
+ "total_flos": 978040391860224.0,
192
+ "train_batch_size": 32,
193
+ "trial_name": null,
194
+ "trial_params": null
195
+ }
checkpoint-2070/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34aef40641c75ceb07f5ee7c8c3d1673038476484d7d3f2997edc701fec171a
3
+ size 4783
checkpoint-2070/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
test_metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"test_loss": 0.28931236267089844, "test_word_bleu": 76.91704413218343, "test_char_bleu": 86.30489121528109, "test_runtime": 1.1051, "test_samples_per_second": 41.626, "test_steps_per_second": 0.905}