daelba commited on
Commit
404305e
1 Parent(s): c3b3d10
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./model_wd_bscz_8",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "mt5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.0",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.0"
7
+ }
models.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "models": [
3
+ {
4
+ "name": "mT5-small",
5
+ "desc": "starting model: mT5-small",
6
+ "source": "google/mt5-small",
7
+ "loss before": 0.0,
8
+ "loss after": 0.0
9
+ },
10
+ {
11
+ "name": "dates",
12
+ "desc": "enrich: conversion from m. d. YYYY to ISO",
13
+ "items": 10000,
14
+ "source": "generated by script",
15
+ "loss before": 24.0673828125,
16
+ "loss after": 0.0028942381031811237,
17
+ "date": "2024-03-12",
18
+ "max length": 59
19
+ },
20
+ {
21
+ "name": "wd_bscz_1",
22
+ "desc": "enrich: data from WD, birthdate: full, deathdate: full",
23
+ "items": 10000,
24
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CREPLACE(STR(%3Fnarkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CREPLACE(STR(%3Fzemkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000",
25
+ "loss before": 17.901836395263672,
26
+ "loss after": 0.21804147958755493,
27
+ "date": "2024-03-12",
28
+ "max length": 401
29
+ },
30
+ {
31
+ "name": "wd_bscz_2",
32
+ "desc": "enrich: data from WD, birthdate: full, deathdate: year",
33
+ "items": 10000,
34
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CREPLACE(STR(%3Fnarkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CREPLACE(STR(%3Fzemkdy)%2C%22-.*%22%2C%22%22)%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000%0AOFFSET%2010000",
35
+ "loss before": 0.5377106070518494,
36
+ "loss after": 0.1428946554660797,
37
+ "date": "2024-03-12",
38
+ "max length": 358
39
+ },
40
+ {
41
+ "name": "wd_bscz_3",
42
+ "desc": "enrich: data from WD, birthdate: year, deathdate: full",
43
+ "items": 10000,
44
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CREPLACE(STR(%3Fzemkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000%0AOFFSET%2020000",
45
+ "loss before": 0.9282719492912292,
46
+ "loss after": 0.12308310717344284,
47
+ "date": "2024-03-14",
48
+ "max length": 352
49
+ },
50
+ {
51
+ "name": "wd_bscz_4",
52
+ "desc": "enrich: data from WD, birthdate: year, deathdate: year",
53
+ "items": 10000,
54
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000%0AOFFSET%2030000",
55
+ "loss before": 0.6698053479194641,
56
+ "loss after": 0.12252699583768845,
57
+ "date": "2024-03-14",
58
+ "max length": 327
59
+ },
60
+ {
61
+ "name": "wd_bscz_5",
62
+ "desc": "enrich: data from WD, birthdate: year, deathdate: year, profession: none",
63
+ "items": 500,
64
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%23%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%2040000",
65
+ "loss before": 3.490724563598633,
66
+ "loss after": 0.0016380766173824668,
67
+ "date": "2024-03-14",
68
+ "max length": 166
69
+ },
70
+ {
71
+ "name": "wd_bscz_6",
72
+ "desc": "enrich: data from WD, birthdate: year, deathdate: none, profession: none",
73
+ "items": 500,
74
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%23%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%2C%20%E2%80%A0%20%3F.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%E2%80%A0%20%3F%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%2040000",
75
+ "loss before": 3.8766534328460693,
76
+ "loss after": 4.379506663099164e-06,
77
+ "date": "2024-03-14",
78
+ "max length": 156
79
+ },
80
+ {
81
+ "name": "wd_bscz_7",
82
+ "desc": "enrich: data from WD, birthdate: none, deathdate: year, profession: none",
83
+ "items": 500,
84
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%23%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20%3F%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C%3F%7C%3F%7D%7D%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%2040000",
85
+ "loss before": 4.1504807472229,
86
+ "loss after": 1.5070370864123106e-05,
87
+ "date": "2024-03-14",
88
+ "max length": 168
89
+ },
90
+ {
91
+ "name": "wd_bscz_8",
92
+ "desc": "enrich: data from WD, birthdate: none, deathdate: none, profession: none",
93
+ "items": 291,
94
+ "source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20%3F%2C%20%E2%80%A0%20%3F.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C%3F%7C%3F%7D%7D%20%E2%80%A0%20%7B%7BWD%7CP570%7C%3F%7C%3F%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%20500",
95
+ "loss before": 4.866212844848633,
96
+ "loss after": 0.0021445967722684145,
97
+ "date": "2024-03-14",
98
+ "max length": 150
99
+ },
100
+ {
101
+ "name": "blgbl-I-1",
102
+ "desc": "Biographisches Lexikon zu Geschichte der Böhmischen Länder I/1",
103
+ "items": 500,
104
+ "source": "",
105
+ "loss before": 3.910360336303711,
106
+ "loss after": 1.032591462135315,
107
+ "date": "2024-03-14",
108
+ "max length": 931
109
+ }
110
+ ]
111
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5b1a7e9fcebf9b3eeb2498705da866cda911a7073348a0e2703305dedee170
3
+ size 1200772485
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccb6a54d5cd6df345c59acea7ed9ad82c0c7ee57d0271e2725bbf5619044fe6
3
+ size 16315220
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "clean_up_tokenization_spaces": true,
4
+ "eos_token": "</s>",
5
+ "extra_ids": 0,
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "<pad>",
8
+ "sp_model_kwargs": {},
9
+ "tokenizer_class": "T5Tokenizer",
10
+ "unk_token": "<unk>"
11
+ }