Trabis commited on
Commit
8223eec
1 Parent(s): 05cc646

Update readme

Browse files
Files changed (1) hide show
  1. README.md +64 -2
README.md CHANGED
@@ -12,6 +12,12 @@ pipeline_tag: translation
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
 
 
 
 
 
 
15
  # Helsinki-NLPopus-mt-tc-big-en-moroccain_dialect
16
 
17
  This model was trained from scratch on the None dataset.
@@ -22,7 +28,52 @@ It achieves the following results on the evaluation set:
22
 
23
  ## Model description
24
 
25
- More information needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  ## Intended uses & limitations
28
 
@@ -30,10 +81,21 @@ More information needed
30
 
31
  ## Training and evaluation data
32
 
33
- More information needed
 
 
 
 
 
 
 
 
 
34
 
35
  ## Training procedure
36
 
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
+ <!-- in this model i use transfer learning for translate english to Moroccain dialect (darija). -->
16
+
17
+ <!-- about dataset used for training model : I used about 18,000 pairs of English and Moroccain Dialect. -->
18
+
19
+ <!-- my model is trained three times, the last being one epoch. -->
20
+
21
  # Helsinki-NLPopus-mt-tc-big-en-moroccain_dialect
22
 
23
  This model was trained from scratch on the None dataset.
 
28
 
29
  ## Model description
30
 
31
+ MarianConfig {
32
+ "_name_or_path": "/content/drive/MyDrive/Colab Notebooks/big_helsinki_eng_dar",
33
+ "activation_dropout": 0.0,
34
+ "activation_function": "relu",
35
+ "architectures": [
36
+ "MarianMTModel"
37
+ ],
38
+ "attention_dropout": 0.0,
39
+ "bad_words_ids": [
40
+ [
41
+ 61246
42
+ ]
43
+ ],
44
+ "bos_token_id": 0,
45
+ "classifier_dropout": 0.0,
46
+ "d_model": 1024,
47
+ "decoder_attention_heads": 16,
48
+ "decoder_ffn_dim": 4096,
49
+ "decoder_layerdrop": 0.0,
50
+ "decoder_layers": 6,
51
+ "decoder_start_token_id": 61246,
52
+ "decoder_vocab_size": 61247,
53
+ "dropout": 0.1,
54
+ "encoder_attention_heads": 16,
55
+ "encoder_ffn_dim": 4096,
56
+ "encoder_layerdrop": 0.0,
57
+ "encoder_layers": 6,
58
+ "eos_token_id": 25897,
59
+ "forced_eos_token_id": 25897,
60
+ "init_std": 0.02,
61
+ "is_encoder_decoder": true,
62
+ "max_length": 512,
63
+ "max_position_embeddings": 1024,
64
+ "model_type": "marian",
65
+ "normalize_embedding": false,
66
+ "num_beams": 4,
67
+ "num_hidden_layers": 6,
68
+ "pad_token_id": 61246,
69
+ "scale_embedding": true,
70
+ "share_encoder_decoder_embeddings": true,
71
+ "static_position_embeddings": true,
72
+ "torch_dtype": "float32",
73
+ "transformers_version": "4.28.0",
74
+ "use_cache": true,
75
+ "vocab_size": 61247
76
+ }
77
 
78
  ## Intended uses & limitations
79
 
 
81
 
82
  ## Training and evaluation data
83
 
84
+ DatasetDict({
85
+ train: Dataset({
86
+ features: ['input_ids', 'attention_mask', 'labels'],
87
+ num_rows: 15443
88
+ })
89
+ test: Dataset({
90
+ features: ['input_ids', 'attention_mask', 'labels'],
91
+ num_rows: 813
92
+ })
93
+ })
94
 
95
  ## Training procedure
96
 
97
+ Using transfer learning due to limited data in the Moroccan dialect.
98
+
99
  ### Training hyperparameters
100
 
101
  The following hyperparameters were used during training: