desi6ner commited on
Commit
561e87c
1 Parent(s): 6311f6c

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
- "_name_or_path": "Helsinki-NLP/opus-mt-en-ar",
 
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -10,7 +11,7 @@
10
  "attention_dropout": 0.0,
11
  "bad_words_ids": [
12
  [
13
- 62801
14
  ]
15
  ],
16
  "bos_token_id": 0,
@@ -21,15 +22,14 @@
21
  "decoder_ffn_dim": 2048,
22
  "decoder_layerdrop": 0.0,
23
  "decoder_layers": 6,
24
- "decoder_start_token_id": 62801,
25
- "decoder_vocab_size": 62802,
26
  "dropout": 0.1,
27
  "encoder_attention_heads": 8,
28
  "encoder_ffn_dim": 2048,
29
  "encoder_layerdrop": 0.0,
30
  "encoder_layers": 6,
31
  "eos_token_id": 0,
32
- "extra_pos_embeddings": 62802,
33
  "forced_eos_token_id": 0,
34
  "id2label": {
35
  "0": "LABEL_0",
@@ -50,12 +50,12 @@
50
  "normalize_embedding": false,
51
  "num_beams": 4,
52
  "num_hidden_layers": 6,
53
- "pad_token_id": 62801,
54
  "scale_embedding": true,
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.39.0.dev0",
59
  "use_cache": true,
60
- "vocab_size": 62802
61
  }
 
1
  {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-ar-en",
3
+ "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "swish",
6
  "add_bias_logits": false,
 
11
  "attention_dropout": 0.0,
12
  "bad_words_ids": [
13
  [
14
+ 62833
15
  ]
16
  ],
17
  "bos_token_id": 0,
 
22
  "decoder_ffn_dim": 2048,
23
  "decoder_layerdrop": 0.0,
24
  "decoder_layers": 6,
25
+ "decoder_start_token_id": 62833,
26
+ "decoder_vocab_size": 62834,
27
  "dropout": 0.1,
28
  "encoder_attention_heads": 8,
29
  "encoder_ffn_dim": 2048,
30
  "encoder_layerdrop": 0.0,
31
  "encoder_layers": 6,
32
  "eos_token_id": 0,
 
33
  "forced_eos_token_id": 0,
34
  "id2label": {
35
  "0": "LABEL_0",
 
50
  "normalize_embedding": false,
51
  "num_beams": 4,
52
  "num_hidden_layers": 6,
53
+ "pad_token_id": 62833,
54
  "scale_embedding": true,
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.40.0.dev0",
59
  "use_cache": true,
60
+ "vocab_size": 62834
61
  }
generation_config.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "bad_words_ids": [
3
  [
4
- 62801
5
  ]
6
  ],
7
  "bos_token_id": 0,
8
- "decoder_start_token_id": 62801,
9
  "eos_token_id": 0,
10
  "forced_eos_token_id": 0,
11
  "max_length": 512,
12
  "num_beams": 4,
13
- "pad_token_id": 62801,
14
  "renormalize_logits": true,
15
- "transformers_version": "4.39.0.dev0"
16
  }
 
1
  {
2
  "bad_words_ids": [
3
  [
4
+ 62833
5
  ]
6
  ],
7
  "bos_token_id": 0,
8
+ "decoder_start_token_id": 62833,
9
  "eos_token_id": 0,
10
  "forced_eos_token_id": 0,
11
  "max_length": 512,
12
  "num_beams": 4,
13
+ "pad_token_id": 62833,
14
  "renormalize_logits": true,
15
+ "transformers_version": "4.40.0.dev0"
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:876fdabd8de9b8a1552a39a5ab70a384fb6299b3fb723e27aae9d8bf5f5aaecc
3
- size 305452744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841061ba1ca3134e21bfcc1c1c1895510fa2bd0a169a26c331af414c02f2097e
3
+ size 305518408
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e00695810b63e4d0d0bc59d08b245bb4cf962172f04a4261c6f5bff21d68be4d
3
- size 610556538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5045d77adc6f5d5e5358a28f934868700fcd9ebb529ddb2dfcff446fa3f07b7
3
+ size 610687610
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b544591965cfbdb044fe4698d517bdab9ae3a234ad953cbe6f274cdccacaff5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28958b8a49074d60dae4c819a3294a6ed04762bd423b4401526de28ec6641877
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c27dd7e7913173594e575e6690cdc3d1efab5f3f899a296c53fb607c1cf521
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c19b22a8c3caaf5a630137d937fd728638cfd0893ce6477cf5def613cd1cae
3
  size 1064
source.spm CHANGED
Binary files a/source.spm and b/source.spm differ
 
target.spm CHANGED
Binary files a/target.spm and b/target.spm differ
 
tokenizer_config.json CHANGED
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "62801": {
20
  "content": "<pad>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -30,9 +30,9 @@
30
  "model_max_length": 512,
31
  "pad_token": "<pad>",
32
  "separate_vocabs": false,
33
- "source_lang": "eng",
34
  "sp_model_kwargs": {},
35
- "target_lang": "ara",
36
  "tokenizer_class": "MarianTokenizer",
37
  "unk_token": "<unk>"
38
  }
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "62833": {
20
  "content": "<pad>",
21
  "lstrip": false,
22
  "normalized": false,
 
30
  "model_max_length": 512,
31
  "pad_token": "<pad>",
32
  "separate_vocabs": false,
33
+ "source_lang": "ar",
34
  "sp_model_kwargs": {},
35
+ "target_lang": "en",
36
  "tokenizer_class": "MarianTokenizer",
37
  "unk_token": "<unk>"
38
  }
trainer_state.json CHANGED
@@ -1,27 +1,146 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.824858757062147,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 2.82,
13
- "grad_norm": 3.9365735054016113,
14
- "learning_rate": 2.919020715630885e-06,
15
- "loss": 0.1939,
16
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 500,
20
- "max_steps": 531,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 3,
23
  "save_steps": 500,
24
- "total_flos": 6335551438848.0,
25
  "train_batch_size": 4,
26
  "trial_name": null,
27
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.860775588048315,
5
  "eval_steps": 500,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.16,
13
+ "grad_norm": 18.022613525390625,
14
+ "learning_rate": 4.735113371477008e-05,
15
+ "loss": 2.3719,
16
  "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.32,
20
+ "grad_norm": 21.23277473449707,
21
+ "learning_rate": 4.4702267429540156e-05,
22
+ "loss": 2.0069,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.48,
27
+ "grad_norm": 16.839088439941406,
28
+ "learning_rate": 4.205340114431024e-05,
29
+ "loss": 1.8155,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.64,
34
+ "grad_norm": 15.446898460388184,
35
+ "learning_rate": 3.9404534859080315e-05,
36
+ "loss": 1.7476,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.79,
41
+ "grad_norm": 24.382474899291992,
42
+ "learning_rate": 3.675566857385039e-05,
43
+ "loss": 1.6636,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.95,
48
+ "grad_norm": 21.278900146484375,
49
+ "learning_rate": 3.410680228862047e-05,
50
+ "loss": 1.624,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 1.11,
55
+ "grad_norm": 14.305024147033691,
56
+ "learning_rate": 3.1457936003390545e-05,
57
+ "loss": 1.2214,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 1.27,
62
+ "grad_norm": 13.627291679382324,
63
+ "learning_rate": 2.8809069718160632e-05,
64
+ "loss": 1.0202,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 1.43,
69
+ "grad_norm": 11.309160232543945,
70
+ "learning_rate": 2.616020343293071e-05,
71
+ "loss": 1.0545,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 1.59,
76
+ "grad_norm": 12.876075744628906,
77
+ "learning_rate": 2.3511337147700785e-05,
78
+ "loss": 1.0358,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 1.75,
83
+ "grad_norm": 11.487010955810547,
84
+ "learning_rate": 2.0862470862470865e-05,
85
+ "loss": 1.0278,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 1.91,
90
+ "grad_norm": 12.434341430664062,
91
+ "learning_rate": 1.821360457724094e-05,
92
+ "loss": 1.0069,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 2.07,
97
+ "grad_norm": 11.067166328430176,
98
+ "learning_rate": 1.5564738292011018e-05,
99
+ "loss": 0.8587,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 2.23,
104
+ "grad_norm": 9.134782791137695,
105
+ "learning_rate": 1.29158720067811e-05,
106
+ "loss": 0.6709,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 2.38,
111
+ "grad_norm": 12.168549537658691,
112
+ "learning_rate": 1.0267005721551176e-05,
113
+ "loss": 0.6534,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 2.54,
118
+ "grad_norm": 8.282147407531738,
119
+ "learning_rate": 7.6181394363212545e-06,
120
+ "loss": 0.6821,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 2.7,
125
+ "grad_norm": 22.135112762451172,
126
+ "learning_rate": 4.9692731510913336e-06,
127
+ "loss": 0.671,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 2.86,
132
+ "grad_norm": 7.933387756347656,
133
+ "learning_rate": 2.3204068658614114e-06,
134
+ "loss": 0.669,
135
+ "step": 9000
136
  }
137
  ],
138
  "logging_steps": 500,
139
+ "max_steps": 9438,
140
  "num_input_tokens_seen": 0,
141
  "num_train_epochs": 3,
142
  "save_steps": 500,
143
+ "total_flos": 184716162957312.0,
144
  "train_batch_size": 4,
145
  "trial_name": null,
146
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da2d8a8c3cce0b00111924516d0009d42009475d9cc4facd034884be6d737922
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec3d61400a7fee3acf0865b303ca025c1aa50ee1eb181cdf52416963ef7e88a0
3
  size 5048
vocab.json CHANGED
The diff for this file is too large to render. See raw diff