yhavinga commited on
Commit
fa73be6
1 Parent(s): 7fb92b3

Add generated texts

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. INTRO.md +2 -1
  2. app.py +54 -13
  3. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/config.json +61 -0
  4. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt +0 -0
  5. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt +0 -0
  6. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt +0 -0
  7. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt +0 -0
  8. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675612503.yeb-z390-k80.19856.0.v2 +3 -0
  9. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/generated.txt +0 -0
  10. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/special_tokens_map.json +107 -0
  11. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/test_results.json +7 -0
  12. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/tokenizer.json +0 -0
  13. data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/tokenizer_config.json +112 -0
  14. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/config.json +30 -0
  15. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt +0 -0
  16. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt +0 -0
  17. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt +0 -0
  18. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt +0 -0
  19. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675589234.yeb-z390-k80.16384.0.v2 +3 -0
  20. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/generated.txt +0 -0
  21. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/special_tokens_map.json +107 -0
  22. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/test_results.json +7 -0
  23. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/tokenizer.json +0 -0
  24. data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/tokenizer_config.json +112 -0
  25. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/config.json +30 -0
  26. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt +0 -0
  27. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt +0 -0
  28. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt +0 -0
  29. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt +0 -0
  30. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675603189.yeb-z390-k80.18766.0.v2 +3 -0
  31. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/generated.txt +0 -0
  32. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/special_tokens_map.json +107 -0
  33. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/test_results.json +7 -0
  34. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/tokenizer.json +0 -0
  35. data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/tokenizer_config.json +112 -0
  36. data/eval_summ_results/11-t5-eff-xl-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675603057.yeb-z390-k80.18371.0.v2 +3 -0
  37. data/eval_summ_results/11-t5-eff-xl-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675652025.yeb-z390-k80.25495.0.v2 +3 -0
  38. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/config.json +30 -0
  39. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt +0 -0
  40. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt +0 -0
  41. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt +0 -0
  42. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt +0 -0
  43. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675607944.yeb-z390-k80.19341.0.v2 +3 -0
  44. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/generated.txt +0 -0
  45. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/special_tokens_map.json +107 -0
  46. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/test_results.json +7 -0
  47. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/tokenizer.json +0 -0
  48. data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/tokenizer_config.json +112 -0
  49. data/eval_summ_results/13-mt5-base/yhavinga_cnn_dailymail_dutch/config.json +31 -0
  50. data/eval_summ_results/13-mt5-base/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt +0 -0
INTRO.md CHANGED
@@ -1,6 +1,7 @@
1
  # Dutch T5 models : UL2, T5, ByT5 and Long-T5 🇳🇱🇧🇪
2
 
3
- TL;DR: ul2-small-dutch(-english) and larger models are fit for Dutch text-to-text tasks.
 
4
 
5
  During the [HuggingFace Flax/Jax community week](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104) in the summer of 2021,
6
  I was granted access to Google's TPU Research Cloud (TRC),
1
  # Dutch T5 models : UL2, T5, ByT5 and Long-T5 🇳🇱🇧🇪
2
 
3
+ TL;DR: Dutch T5 and UL2 Models Trained with Google's TPU Research Cloud and mC4 Dataset Show Outstanding Performance in NLP Tasks.
4
+ See below for model lists and comparison.
5
 
6
  During the [HuggingFace Flax/Jax community week](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104) in the summer of 2021,
7
  I was granted access to Google's TPU Research Cloud (TRC),
app.py CHANGED
@@ -1,6 +1,4 @@
1
- from functools import partial
2
- import time
3
-
4
  import sqlite3
5
  import psutil
6
  import streamlit as st
@@ -13,7 +11,7 @@ IMAGE_WIDTHS = 900
13
  PRE_TRAINED_DB = "data/pretrained.sqlite"
14
 
15
 
16
- @st.cache
17
  def load_eval_data():
18
  conn = sqlite3.connect(PRE_TRAINED_DB)
19
  conn.row_factory = lambda c, r: {
@@ -37,7 +35,21 @@ def load_eval_data():
37
  columns={"summ_rouge1": "summ Rouge1", "trans_en_nl_score": "en->nl Bleu"},
38
  inplace=True,
39
  )
40
- return plot_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  def main():
@@ -47,7 +59,7 @@ def main():
47
  initial_sidebar_state="collapsed", # Can be "auto", "expanded", "collapsed"
48
  page_icon="📑", # String, anything supported by st.image, or None.
49
  )
50
- plot_df = load_eval_data()
51
 
52
  with open("style.css") as f:
53
  st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
@@ -76,14 +88,19 @@ Fine-tuning for evaluation was done on a limited set of 50K examples from the fi
76
  | source length | 1024 | 128 |
77
  | target length | 142 | 128 |
78
  | #eval samples | 1000 | 1000 |
79
- | wandb link | [eval_summ](https://wandb.ai/yepster/eval_dutch_cnndaily_202302_flax)|[eval_transl](https://wandb.ai/yepster/eval_dutch_ccmatrix_202302_flax) |
 
 
80
 
81
  ### Evaluation results
82
 
83
  The figure below shows the evaluation scores for most models, with summarization Rouge1 on the x-axis (higher is better),
84
  and translation English to Dutch Bleu score on the y-axis (higher is better).
85
- The point size is proportional to the model size. UL2 models are blue, Flan models
86
- red, mT5 green and the other models black.
 
 
 
87
  """
88
  )
89
  col1, col2 = st.columns(2)
@@ -100,8 +117,8 @@ red, mT5 green and the other models black.
100
  large_enabled = st.checkbox("large model sizes")
101
  _24_enabled = st.checkbox("small nl24 deep narrow sizes")
102
  _36_enabled = st.checkbox("base nl36 deep narrow sizes")
103
- _8l_enabled = st.checkbox("large nl8 deep wide sizes")
104
- _4xl_enabled = st.checkbox("xlarge nl4 deep wide sizes")
105
 
106
  plot_df = plot_df[
107
  (plot_df["name"].str.contains("ul2") & ul2_enabled)
@@ -166,6 +183,30 @@ red, mT5 green and the other models black.
166
  tokens, the sliding attention window with radius length 127 of the `long-t5` models should be able to handle this.
167
  """)
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  with open("REMARKS.md", "r") as f:
170
  st.markdown(f.read())
171
 
@@ -248,12 +289,12 @@ models to converge during fine-tuning.
248
  )
249
 
250
  st.markdown(
251
- """### Sequence length 512 or 1024
252
 
253
  The models `t5-v1_1-base-dutch-english-cased` and `t5-v1_1-base-dutch-english-cased-1024` have the same model dimensions,
254
  but are pre-trained on different sequence lenghts, 512 and 1024 respectively.
255
  The evaluation loss and accuracy of the models do not look too different. Since training of the 1024 sequence length model was
256
- very slow and didn't converge a was was very slow, I stopped it early. The figure below shows the evaluation
257
  loss and accuracy.
258
  """
259
  )
1
+ from glob import glob
 
 
2
  import sqlite3
3
  import psutil
4
  import streamlit as st
11
  PRE_TRAINED_DB = "data/pretrained.sqlite"
12
 
13
 
14
+ @st.cache_data
15
  def load_eval_data():
16
  conn = sqlite3.connect(PRE_TRAINED_DB)
17
  conn.row_factory = lambda c, r: {
35
  columns={"summ_rouge1": "summ Rouge1", "trans_en_nl_score": "en->nl Bleu"},
36
  inplace=True,
37
  )
38
+ # for each model, read the summary text
39
+ for i, row in df.iterrows():
40
+ dirs = glob(f"data/eval_summ_results/{row['id']}-{row['name']}/yhavinga_cnn_dailymail_dutch/eval_predictions*")
41
+ try:
42
+ file = dirs[-1] + "/generated.txt"
43
+ with open(file, "r") as f:
44
+ text = str(row["id"]) + " " + f.read().replace("<n>", " ")
45
+ except Exception:
46
+ text = "fine-tune failed, no data"
47
+ df.at[i, "summary"] = text
48
+
49
+ # order df by the name column desc
50
+ df.sort_values(by="name", inplace=True, ascending=False)
51
+
52
+ return plot_df, df
53
 
54
 
55
  def main():
59
  initial_sidebar_state="collapsed", # Can be "auto", "expanded", "collapsed"
60
  page_icon="📑", # String, anything supported by st.image, or None.
61
  )
62
+ plot_df, df = load_eval_data()
63
 
64
  with open("style.css") as f:
65
  st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
88
  | source length | 1024 | 128 |
89
  | target length | 142 | 128 |
90
  | #eval samples | 1000 | 1000 |
91
+ | WandB link | [eval_summ](https://wandb.ai/yepster/eval_dutch_cnndaily_202302_flax)|[eval_transl](https://wandb.ai/yepster/eval_dutch_ccmatrix_202302_flax) |
92
+
93
+ On the WandB links above you can also find generated texts for each model to compare.
94
 
95
  ### Evaluation results
96
 
97
  The figure below shows the evaluation scores for most models, with summarization Rouge1 on the x-axis (higher is better),
98
  and translation English to Dutch Bleu score on the y-axis (higher is better).
99
+ The point size is proportional to the model size.
100
+ UL2 models are blue,
101
+ t5_1_1 models orange,
102
+ Flan models red,
103
+ mT5 green and the other models black.
104
  """
105
  )
106
  col1, col2 = st.columns(2)
117
  large_enabled = st.checkbox("large model sizes")
118
  _24_enabled = st.checkbox("small nl24 deep narrow sizes")
119
  _36_enabled = st.checkbox("base nl36 deep narrow sizes")
120
+ _8l_enabled = st.checkbox("large nl8 shallow sizes")
121
+ _4xl_enabled = st.checkbox("xlarge nl4 shallow wide sizes")
122
 
123
  plot_df = plot_df[
124
  (plot_df["name"].str.contains("ul2") & ul2_enabled)
183
  tokens, the sliding attention window with radius length 127 of the `long-t5` models should be able to handle this.
184
  """)
185
 
186
+ st.markdown("### Compare generated summaries")
187
+ col1, col2 = st.columns(2)
188
+ with col1:
189
+ model_left = st.selectbox("Choose left model", df["name"], index=6)
190
+ with col2:
191
+ model_right = st.selectbox("Choose right model", df["name"], index=33)
192
+
193
+ @st.cache_resource
194
+ def get_row(model):
195
+ return df[df["name"] == model]
196
+
197
+ row_left = get_row(model_left)
198
+ row_right = get_row(model_right)
199
+
200
+ contents1 = row_left["summary"].values[0].split("\n")
201
+ contents2 = row_right["summary"].values[0].split("\n")
202
+ contents = list(zip(contents1, contents2))[:5]
203
+ st.table(
204
+ pd.DataFrame(
205
+ contents,
206
+ columns=[model_left, model_right],
207
+ )
208
+ )
209
+
210
  with open("REMARKS.md", "r") as f:
211
  st.markdown(f.read())
212
 
289
  )
290
 
291
  st.markdown(
292
+ """### Pre-training with sequence length 512 or 1024
293
 
294
  The models `t5-v1_1-base-dutch-english-cased` and `t5-v1_1-base-dutch-english-cased-1024` have the same model dimensions,
295
  but are pre-trained on different sequence lenghts, 512 and 1024 respectively.
296
  The evaluation loss and accuracy of the models do not look too different. Since training of the 1024 sequence length model was
297
+ very slow and didn't converge, I stopped it early. The figure below shows the evaluation
298
  loss and accuracy.
299
  """
300
  )
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "yhavinga/t5-base-dutch",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 3072,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.0,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "gradient_checkpointing": false,
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.23.1",
59
+ "use_cache": true,
60
+ "vocab_size": 32103
61
+ }
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675612503.yeb-z390-k80.19856.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed01f1c77e3af6010a984f90a7fc5cd4caced6134f30f636371a7584f1d0e29f
3
+ size 9290
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/test_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "test_gen_len": 66.217,
3
+ "test_rouge1": 30.6695,
4
+ "test_rouge2": 11.1431,
5
+ "test_rougeL": 22.1128,
6
+ "test_rougeLsum": 27.4923
7
+ }
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/0-t5-base-dutch/yhavinga_cnn_dailymail_dutch/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "name_or_path": "yhavinga/t5-base-dutch",
107
+ "pad_token": "<pad>",
108
+ "special_tokens_map_file": null,
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>",
111
+ "use_fast_tokenizer": true
112
+ }
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "yhavinga/t5-v1.1-base-dutch-uncased",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.0,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 12,
20
+ "num_heads": 12,
21
+ "num_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "transformers_version": "4.23.1",
28
+ "use_cache": true,
29
+ "vocab_size": 32103
30
+ }
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675589234.yeb-z390-k80.16384.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1c21c0da2969ef7743d7cf3dd61b26122eff70be4d6bf68efd0bf7f50126e0
3
+ size 9290
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/test_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "test_gen_len": 63.392,
3
+ "test_rouge1": 28.9991,
4
+ "test_rouge2": 10.629,
5
+ "test_rougeL": 21.559,
6
+ "test_rougeLsum": 25.8178
7
+ }
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/1-t5-v1.1-base-dutch-uncased/yhavinga_cnn_dailymail_dutch/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "name_or_path": "yhavinga/t5-v1.1-base-dutch-uncased",
107
+ "pad_token": "<pad>",
108
+ "special_tokens_map_file": null,
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>",
111
+ "use_fast_tokenizer": true
112
+ }
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "yhavinga/t5-eff-large-8l-nedd",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 4096,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.0,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 8,
21
+ "num_heads": 16,
22
+ "num_layers": 8,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.23.1",
28
+ "use_cache": true,
29
+ "vocab_size": 32103
30
+ }
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675603189.yeb-z390-k80.18766.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb823199e6ddc70a02f74c01683ca414912e77f6ed5157b94e3a3f459a61c1e
3
+ size 9290
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/test_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "test_gen_len": 70.0,
3
+ "test_rouge1": 24.8564,
4
+ "test_rouge2": 8.1936,
5
+ "test_rougeL": 19.3063,
6
+ "test_rougeLsum": 19.3954
7
+ }
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/10-t5-eff-large-8l-nedd/yhavinga_cnn_dailymail_dutch/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "name_or_path": "yhavinga/t5-eff-large-8l-nedd",
107
+ "pad_token": "<pad>",
108
+ "special_tokens_map_file": null,
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>",
111
+ "use_fast_tokenizer": true
112
+ }
data/eval_summ_results/11-t5-eff-xl-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675603057.yeb-z390-k80.18371.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96826e56bb14c5cf0f5b4b93111c8e006ba0ef4305cc24c95f77734e6c46dc8e
3
+ size 40
data/eval_summ_results/11-t5-eff-xl-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675652025.yeb-z390-k80.25495.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9793f549ee8cfd23fe5725ccdf7f7522f33772f0aa270b6efaf0abb77eec1569
3
+ size 40
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "yhavinga/t5-eff-large-8l-dutch-english-cased",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 4096,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.0,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 8,
21
+ "num_heads": 16,
22
+ "num_layers": 8,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.23.1",
28
+ "use_cache": true,
29
+ "vocab_size": 32103
30
+ }
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_labels_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_predictions_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/eval_predictions_00006249_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/events.out.tfevents.1675607944.yeb-z390-k80.19341.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d7872f6a483ce457f0edeca96c1a38dbb83c66e401f5668ac1166f4efcba8f2
3
+ size 9290
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/generated.txt ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/test_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "test_gen_len": 69.869,
3
+ "test_rouge1": 28.8587,
4
+ "test_rouge2": 10.1392,
5
+ "test_rougeL": 21.2288,
6
+ "test_rougeLsum": 25.8249
7
+ }
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
data/eval_summ_results/12-t5-eff-large-8l-dutch-english-cased/yhavinga_cnn_dailymail_dutch/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "name_or_path": "yhavinga/t5-eff-large-8l-dutch-english-cased",
107
+ "pad_token": "<pad>",
108
+ "special_tokens_map_file": null,
109
+ "tokenizer_class": "T5Tokenizer",
110
+ "unk_token": "<unk>",
111
+ "use_fast_tokenizer": true
112
+ }
data/eval_summ_results/13-mt5-base/yhavinga_cnn_dailymail_dutch/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-base",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.0,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "mt5",
19
+ "num_decoder_layers": 12,
20
+ "num_heads": 12,
21
+ "num_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "transformers_version": "4.23.1",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
data/eval_summ_results/13-mt5-base/yhavinga_cnn_dailymail_dutch/eval_labels_00003136_0/generated.txt ADDED
The diff for this file is too large to render. See raw diff