+ model
Browse files- .gitattributes +1 -0
- config.json +31 -0
- generation_config.json +7 -0
- models.json +111 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +5 -0
- spiece.model +3 -0
- tokenizer.json +3 -0
- tokenizer_config.json +11 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./model_wd_bscz_8",
|
3 |
+
"architectures": [
|
4 |
+
"MT5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"d_ff": 1024,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 512,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dense_act_fn": "gelu_new",
|
11 |
+
"dropout_rate": 0.1,
|
12 |
+
"eos_token_id": 1,
|
13 |
+
"feed_forward_proj": "gated-gelu",
|
14 |
+
"initializer_factor": 1.0,
|
15 |
+
"is_encoder_decoder": true,
|
16 |
+
"is_gated_act": true,
|
17 |
+
"layer_norm_epsilon": 1e-06,
|
18 |
+
"model_type": "mt5",
|
19 |
+
"num_decoder_layers": 8,
|
20 |
+
"num_heads": 6,
|
21 |
+
"num_layers": 8,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"relative_attention_max_distance": 128,
|
24 |
+
"relative_attention_num_buckets": 32,
|
25 |
+
"tie_word_embeddings": false,
|
26 |
+
"tokenizer_class": "T5Tokenizer",
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.30.0",
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 250112
|
31 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"decoder_start_token_id": 0,
|
4 |
+
"eos_token_id": 1,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.30.0"
|
7 |
+
}
|
models.json
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"models": [
|
3 |
+
{
|
4 |
+
"name": "mT5-small",
|
5 |
+
"desc": "starting model: mT5-small",
|
6 |
+
"source": "google/mt5-small",
|
7 |
+
"loss before": 0.0,
|
8 |
+
"loss after": 0.0
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"name": "dates",
|
12 |
+
"desc": "enrich: conversion from m. d. YYYY to ISO",
|
13 |
+
"items": 10000,
|
14 |
+
"source": "generated by script",
|
15 |
+
"loss before": 24.0673828125,
|
16 |
+
"loss after": 0.0028942381031811237,
|
17 |
+
"date": "2024-03-12",
|
18 |
+
"max length": 59
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"name": "wd_bscz_1",
|
22 |
+
"desc": "enrich: data from WD, birthdate: full, deathdate: full",
|
23 |
+
"items": 10000,
|
24 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CREPLACE(STR(%3Fnarkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CREPLACE(STR(%3Fzemkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000",
|
25 |
+
"loss before": 17.901836395263672,
|
26 |
+
"loss after": 0.21804147958755493,
|
27 |
+
"date": "2024-03-12",
|
28 |
+
"max length": 401
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "wd_bscz_2",
|
32 |
+
"desc": "enrich: data from WD, birthdate: full, deathdate: year",
|
33 |
+
"items": 10000,
|
34 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CREPLACE(STR(%3Fnarkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fnarkdy))%2C'.%20'%2CSTR(MONTH(%3Fnarkdy))%2C'.%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CREPLACE(STR(%3Fzemkdy)%2C%22-.*%22%2C%22%22)%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000%0AOFFSET%2010000",
|
35 |
+
"loss before": 0.5377106070518494,
|
36 |
+
"loss after": 0.1428946554660797,
|
37 |
+
"date": "2024-03-12",
|
38 |
+
"max length": 358
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"name": "wd_bscz_3",
|
42 |
+
"desc": "enrich: data from WD, birthdate: year, deathdate: full",
|
43 |
+
"items": 10000,
|
44 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CREPLACE(STR(%3Fzemkdy)%2C%22T.*%22%2C%22%22)%2C'%7C'%2CSTR(DAY(%3Fzemkdy))%2C'.%20'%2CSTR(MONTH(%3Fzemkdy))%2C'.%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000%0AOFFSET%2020000",
|
45 |
+
"loss before": 0.9282719492912292,
|
46 |
+
"loss after": 0.12308310717344284,
|
47 |
+
"date": "2024-03-14",
|
48 |
+
"max length": 352
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"name": "wd_bscz_4",
|
52 |
+
"desc": "enrich: data from WD, birthdate: year, deathdate: year",
|
53 |
+
"items": 10000,
|
54 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%3Fnarkde%20rdfs%3Alabel%20%3FnarkdeLabel.%20FILTER(LANG(%3FnarkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%3Fzemkde%20rdfs%3Alabel%20%3FzemkdeLabel.%20FILTER(LANG(%3FzemkdeLabel)%3D%22de%22)%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20'%2CSTR(%3FprofLabel)%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%20'%2CSTR(%3FnarkdeLabel)%2C'%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'%20'%2CSTR(%3FzemkdeLabel)%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20%7B%7BWD%7CP106%7C'%2CREPLACE(STR(%3Fprof)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FprofLabel)%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%7B%7BWD%7CP19%7C'%2CREPLACE(STR(%3Fnarkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FnarkdeLabel)%2C'%7D%7D%2C%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D%20%7B%7BWD%7CP20%7C'%2CREPLACE(STR(%3Fzemkde)%2C%22.*%2FQ%22%2C%22Q%22)%2C'%7C'%2CSTR(%3FzemkdeLabel)%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%2010000%0AOFFSET%2030000",
|
55 |
+
"loss before": 0.6698053479194641,
|
56 |
+
"loss after": 0.12252699583768845,
|
57 |
+
"date": "2024-03-14",
|
58 |
+
"max length": 327
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"name": "wd_bscz_5",
|
62 |
+
"desc": "enrich: data from WD, birthdate: year, deathdate: year, profession: none",
|
63 |
+
"items": 500,
|
64 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%23%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%2040000",
|
65 |
+
"loss before": 3.490724563598633,
|
66 |
+
"loss after": 0.0016380766173824668,
|
67 |
+
"date": "2024-03-14",
|
68 |
+
"max length": 166
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"name": "wd_bscz_6",
|
72 |
+
"desc": "enrich: data from WD, birthdate: year, deathdate: none, profession: none",
|
73 |
+
"items": 500,
|
74 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%23%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20'%2CSTR(YEAR(%3Fnarkdy))%2C'%2C%20%E2%80%A0%20%3F.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C'%2CSTR(YEAR(%3Fnarkdy))%2C'%7D%7D%20%E2%80%A0%20%3F%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%2040000",
|
75 |
+
"loss before": 3.8766534328460693,
|
76 |
+
"loss after": 4.379506663099164e-06,
|
77 |
+
"date": "2024-03-14",
|
78 |
+
"max length": 156
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "wd_bscz_7",
|
82 |
+
"desc": "enrich: data from WD, birthdate: none, deathdate: year, profession: none",
|
83 |
+
"items": 500,
|
84 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%23%20OPTIONAL%20%7B%20%3Fitem%20wdt%3AP106%20%3Fprof.%20%3Fprof%20rdfs%3Alabel%20%3FprofLabel.%20FILTER(LANG(%3FprofLabel)%3D%22de%22)%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20%3F%2C%20%E2%80%A0%20'%2CSTR(YEAR(%3Fzemkdy))%2C'.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C%3F%7C%3F%7D%7D%20%E2%80%A0%20%7B%7BWD%7CP570%7C'%2CSTR(YEAR(%3Fzemkdy))%2C'%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%2040000",
|
85 |
+
"loss before": 4.1504807472229,
|
86 |
+
"loss after": 1.5070370864123106e-05,
|
87 |
+
"date": "2024-03-14",
|
88 |
+
"max length": 168
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"name": "wd_bscz_8",
|
92 |
+
"desc": "enrich: data from WD, birthdate: none, deathdate: none, profession: none",
|
93 |
+
"items": 291,
|
94 |
+
"source": "https://query.wikidata.org/#SELECT%20%3Fresult%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP9160%20%3Fid%3B%0A%20%20%20%20%20%20%20%20rdfs%3Alabel%20%3Flabel.%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP569%20%3Fnarkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP19%20%3Fnarkde.%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP570%20%3Fzemkdy%20%7D%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP20%20%3Fzemkde.%20%7D%0A%20%20FILTER(LANG(%3Flabel)%3D%22de%22)%0A%20%20BIND(REPLACE(STR(%3Flabel)%2C%22(%5B%5E%20%5D%2B)%20(%5B%5E%20%5D%2B)%22%2C%22%242%2C%20%241%22)%20AS%20%3FlabelEdit)%0A%20%20BIND(CONCAT('%22wd%3A'%2C%3FlabelEdit%2C'%2C%20*%20%3F%2C%20%E2%80%A0%20%3F.%22%2C%22%7B%7BWD%7Clabel%7C'%2C%3FlabelEdit%2C'%7D%7D%2C%20*%20%7B%7BWD%7CP569%7C%3F%7C%3F%7D%7D%20%E2%80%A0%20%7B%7BWD%7CP570%7C%3F%7C%3F%7D%7D.%22')%20AS%20%3Fresult)%0A%20%20FILTER(%3Fresult!%3D%22%22)%0A%7D%0ALIMIT%20500%0A%23OFFSET%20500",
|
95 |
+
"loss before": 4.866212844848633,
|
96 |
+
"loss after": 0.0021445967722684145,
|
97 |
+
"date": "2024-03-14",
|
98 |
+
"max length": 150
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"name": "blgbl-I-1",
|
102 |
+
"desc": "Biographisches Lexikon zu Geschichte der Böhmischen Länder I/1",
|
103 |
+
"items": 500,
|
104 |
+
"source": "",
|
105 |
+
"loss before": 3.910360336303711,
|
106 |
+
"loss after": 1.032591462135315,
|
107 |
+
"date": "2024-03-14",
|
108 |
+
"max length": 931
|
109 |
+
}
|
110 |
+
]
|
111 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c5b1a7e9fcebf9b3eeb2498705da866cda911a7073348a0e2703305dedee170
|
3 |
+
size 1200772485
|
special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": "</s>",
|
3 |
+
"pad_token": "<pad>",
|
4 |
+
"unk_token": "<unk>"
|
5 |
+
}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
|
3 |
+
size 4309802
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dccb6a54d5cd6df345c59acea7ed9ad82c0c7ee57d0271e2725bbf5619044fe6
|
3 |
+
size 16315220
|
tokenizer_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": null,
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"extra_ids": 0,
|
6 |
+
"model_max_length": 1000000000000000019884624838656,
|
7 |
+
"pad_token": "<pad>",
|
8 |
+
"sp_model_kwargs": {},
|
9 |
+
"tokenizer_class": "T5Tokenizer",
|
10 |
+
"unk_token": "<unk>"
|
11 |
+
}
|