Juho Inkinen commited on
Commit
452f4e1
1 Parent(s): d4a076f

Reset repo before projects upload with Annif

Browse files
Files changed (47) hide show
  1. .dvc/.gitignore +0 -3
  2. .dvc/config +0 -6
  3. .dvcignore +0 -3
  4. .gitattributes +0 -48
  5. .gitignore +0 -2
  6. LICENSE +0 -121
  7. README.md +0 -45
  8. corpora/.gitignore +0 -2
  9. corpora/finna/.gitignore +0 -1
  10. corpora/finna/kauno-finna-fin.tsv.gz.dvc +0 -10
  11. corpora/kauno-skos-reduced.ttl.dvc +0 -11
  12. corpora/kirjasampo/.gitignore +0 -3
  13. corpora/kirjasampo/kirjasampo-kauno-test.tsv.dvc +0 -11
  14. corpora/kirjasampo/kirjasampo-kauno-train.tsv.dvc +0 -11
  15. corpora/kirjasampo/kirjasampo-kauno-validate.tsv.dvc +0 -11
  16. data/projects/.gitignore +0 -8
  17. data/projects/kauno-fi/nn-model.keras +0 -3
  18. data/projects/kauno-fi/nn-train.mdb/data.mdb +0 -3
  19. data/projects/kauno-fi/nn-train.mdb/lock.mdb +0 -0
  20. data/projects/kauno-finna-bonsai-fi/omikuji-model/settings.json +0 -4
  21. data/projects/kauno-finna-bonsai-fi/omikuji-model/tree0.cbor +0 -3
  22. data/projects/kauno-finna-bonsai-fi/omikuji-model/tree1.cbor +0 -3
  23. data/projects/kauno-finna-bonsai-fi/omikuji-model/tree2.cbor +0 -3
  24. data/projects/kauno-finna-bonsai-fi/omikuji-train.txt +0 -3
  25. data/projects/kauno-finna-bonsai-fi/vectorizer +0 -3
  26. data/projects/kauno-ks-bonsai-fi/omikuji-model/settings.json +0 -4
  27. data/projects/kauno-ks-bonsai-fi/omikuji-model/tree0.cbor +0 -3
  28. data/projects/kauno-ks-bonsai-fi/omikuji-model/tree1.cbor +0 -3
  29. data/projects/kauno-ks-bonsai-fi/omikuji-model/tree2.cbor +0 -3
  30. data/projects/kauno-ks-bonsai-fi/omikuji-train.txt +0 -3
  31. data/projects/kauno-ks-bonsai-fi/vectorizer +0 -3
  32. data/projects/kauno-mllm-fi/mllm-model.gz +0 -3
  33. data/projects/kauno-mllm-fi/mllm-train.gz +0 -3
  34. data/vocabs/.gitignore +0 -1
  35. data/vocabs/kauno/subjects.csv +0 -0
  36. data/vocabs/kauno/subjects.dump.gz +0 -3
  37. data/vocabs/kauno/subjects.ttl +0 -3
  38. dvc.lock +0 -542
  39. dvc.yaml +0 -76
  40. projects.d/2-projects-kauno.toml +0 -44
  41. projects.toml +0 -44
  42. reports/kauno-fi.json +0 -5
  43. reports/kauno-finna-bonsai-fi.json +0 -5
  44. reports/kauno-ks-bonsai-fi.json +0 -5
  45. reports/kauno-mllm-fi.json +0 -5
  46. requirements.txt +0 -1
  47. sync-model-data-ocp.sh +0 -32
.dvc/.gitignore DELETED
@@ -1,3 +0,0 @@
1
- /config.local
2
- /tmp
3
- /cache
 
 
 
 
.dvc/config DELETED
@@ -1,6 +0,0 @@
1
- [cache]
2
- dir = /data/dvc-cache/FintoAI-data-KAUNO
3
- shared = group
4
- type = symlink
5
- [core]
6
- autostage = true
 
 
 
 
 
 
 
.dvcignore DELETED
@@ -1,3 +0,0 @@
1
- # Add patterns of files dvc should ignore, which could improve
2
- # the performance. Learn more at
3
- # https://dvc.org/doc/user-guide/dvcignore
 
 
 
 
.gitattributes DELETED
@@ -1,48 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- data/projects/kauno-fi/nn-model.keras filter=lfs diff=lfs merge=lfs -text
37
- data/projects/kauno-fi/nn-train.mdb/data.mdb filter=lfs diff=lfs merge=lfs -text
38
- data/projects/kauno-finna-bonsai-fi/omikuji-model/tree0.cbor filter=lfs diff=lfs merge=lfs -text
39
- data/projects/kauno-finna-bonsai-fi/omikuji-model/tree1.cbor filter=lfs diff=lfs merge=lfs -text
40
- data/projects/kauno-finna-bonsai-fi/omikuji-model/tree2.cbor filter=lfs diff=lfs merge=lfs -text
41
- data/projects/kauno-finna-bonsai-fi/omikuji-train.txt filter=lfs diff=lfs merge=lfs -text
42
- data/projects/kauno-finna-bonsai-fi/vectorizer filter=lfs diff=lfs merge=lfs -text
43
- data/projects/kauno-ks-bonsai-fi/omikuji-model/tree0.cbor filter=lfs diff=lfs merge=lfs -text
44
- data/projects/kauno-ks-bonsai-fi/omikuji-model/tree1.cbor filter=lfs diff=lfs merge=lfs -text
45
- data/projects/kauno-ks-bonsai-fi/omikuji-model/tree2.cbor filter=lfs diff=lfs merge=lfs -text
46
- data/projects/kauno-ks-bonsai-fi/omikuji-train.txt filter=lfs diff=lfs merge=lfs -text
47
- data/projects/kauno-ks-bonsai-fi/vectorizer filter=lfs diff=lfs merge=lfs -text
48
- data/vocabs/kauno/subjects.ttl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore DELETED
@@ -1,2 +0,0 @@
1
- venv
2
- venv-installed
 
 
 
LICENSE DELETED
@@ -1,121 +0,0 @@
1
- Creative Commons Legal Code
2
-
3
- CC0 1.0 Universal
4
-
5
- CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6
- LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7
- ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8
- INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9
- REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10
- PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11
- THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12
- HEREUNDER.
13
-
14
- Statement of Purpose
15
-
16
- The laws of most jurisdictions throughout the world automatically confer
17
- exclusive Copyright and Related Rights (defined below) upon the creator
18
- and subsequent owner(s) (each and all, an "owner") of an original work of
19
- authorship and/or a database (each, a "Work").
20
-
21
- Certain owners wish to permanently relinquish those rights to a Work for
22
- the purpose of contributing to a commons of creative, cultural and
23
- scientific works ("Commons") that the public can reliably and without fear
24
- of later claims of infringement build upon, modify, incorporate in other
25
- works, reuse and redistribute as freely as possible in any form whatsoever
26
- and for any purposes, including without limitation commercial purposes.
27
- These owners may contribute to the Commons to promote the ideal of a free
28
- culture and the further production of creative, cultural and scientific
29
- works, or to gain reputation or greater distribution for their Work in
30
- part through the use and efforts of others.
31
-
32
- For these and/or other purposes and motivations, and without any
33
- expectation of additional consideration or compensation, the person
34
- associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35
- is an owner of Copyright and Related Rights in the Work, voluntarily
36
- elects to apply CC0 to the Work and publicly distribute the Work under its
37
- terms, with knowledge of his or her Copyright and Related Rights in the
38
- Work and the meaning and intended legal effect of CC0 on those rights.
39
-
40
- 1. Copyright and Related Rights. A Work made available under CC0 may be
41
- protected by copyright and related or neighboring rights ("Copyright and
42
- Related Rights"). Copyright and Related Rights include, but are not
43
- limited to, the following:
44
-
45
- i. the right to reproduce, adapt, distribute, perform, display,
46
- communicate, and translate a Work;
47
- ii. moral rights retained by the original author(s) and/or performer(s);
48
- iii. publicity and privacy rights pertaining to a person's image or
49
- likeness depicted in a Work;
50
- iv. rights protecting against unfair competition in regards to a Work,
51
- subject to the limitations in paragraph 4(a), below;
52
- v. rights protecting the extraction, dissemination, use and reuse of data
53
- in a Work;
54
- vi. database rights (such as those arising under Directive 96/9/EC of the
55
- European Parliament and of the Council of 11 March 1996 on the legal
56
- protection of databases, and under any national implementation
57
- thereof, including any amended or successor version of such
58
- directive); and
59
- vii. other similar, equivalent or corresponding rights throughout the
60
- world based on applicable law or treaty, and any national
61
- implementations thereof.
62
-
63
- 2. Waiver. To the greatest extent permitted by, but not in contravention
64
- of, applicable law, Affirmer hereby overtly, fully, permanently,
65
- irrevocably and unconditionally waives, abandons, and surrenders all of
66
- Affirmer's Copyright and Related Rights and associated claims and causes
67
- of action, whether now known or unknown (including existing as well as
68
- future claims and causes of action), in the Work (i) in all territories
69
- worldwide, (ii) for the maximum duration provided by applicable law or
70
- treaty (including future time extensions), (iii) in any current or future
71
- medium and for any number of copies, and (iv) for any purpose whatsoever,
72
- including without limitation commercial, advertising or promotional
73
- purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74
- member of the public at large and to the detriment of Affirmer's heirs and
75
- successors, fully intending that such Waiver shall not be subject to
76
- revocation, rescission, cancellation, termination, or any other legal or
77
- equitable action to disrupt the quiet enjoyment of the Work by the public
78
- as contemplated by Affirmer's express Statement of Purpose.
79
-
80
- 3. Public License Fallback. Should any part of the Waiver for any reason
81
- be judged legally invalid or ineffective under applicable law, then the
82
- Waiver shall be preserved to the maximum extent permitted taking into
83
- account Affirmer's express Statement of Purpose. In addition, to the
84
- extent the Waiver is so judged Affirmer hereby grants to each affected
85
- person a royalty-free, non transferable, non sublicensable, non exclusive,
86
- irrevocable and unconditional license to exercise Affirmer's Copyright and
87
- Related Rights in the Work (i) in all territories worldwide, (ii) for the
88
- maximum duration provided by applicable law or treaty (including future
89
- time extensions), (iii) in any current or future medium and for any number
90
- of copies, and (iv) for any purpose whatsoever, including without
91
- limitation commercial, advertising or promotional purposes (the
92
- "License"). The License shall be deemed effective as of the date CC0 was
93
- applied by Affirmer to the Work. Should any part of the License for any
94
- reason be judged legally invalid or ineffective under applicable law, such
95
- partial invalidity or ineffectiveness shall not invalidate the remainder
96
- of the License, and in such case Affirmer hereby affirms that he or she
97
- will not (i) exercise any of his or her remaining Copyright and Related
98
- Rights in the Work or (ii) assert any associated claims and causes of
99
- action with respect to the Work, in either case contrary to Affirmer's
100
- express Statement of Purpose.
101
-
102
- 4. Limitations and Disclaimers.
103
-
104
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
105
- surrendered, licensed or otherwise affected by this document.
106
- b. Affirmer offers the Work as-is and makes no representations or
107
- warranties of any kind concerning the Work, express, implied,
108
- statutory or otherwise, including without limitation warranties of
109
- title, merchantability, fitness for a particular purpose, non
110
- infringement, or the absence of latent or other defects, accuracy, or
111
- the present or absence of errors, whether or not discoverable, all to
112
- the greatest extent permissible under applicable law.
113
- c. Affirmer disclaims responsibility for clearing rights of other persons
114
- that may apply to the Work or any use thereof, including without
115
- limitation any person's Copyright and Related Rights in the Work.
116
- Further, Affirmer disclaims responsibility for obtaining any necessary
117
- consents, permissions or other rights required for any use of the
118
- Work.
119
- d. Affirmer understands and acknowledges that Creative Commons is not a
120
- party to this document and has no duty or obligation with respect to
121
- this CC0 or use of the Work.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,45 +0,0 @@
1
- ---
2
- license: cc0-1.0
3
- language:
4
- - fi
5
- pipeline_tag: text-classification
6
- thumbnail: https://raw.githubusercontent.com/NatLibFi/FintoAI/main/ai.finto.fi/static/img/finto-ai-social.png
7
- tags:
8
- - glam
9
- - lam
10
- - subject indexing
11
- - annif
12
- ---
13
- # FintoAI-data-KAUNO
14
- This repository is for the Annif projects with the
15
- [KAUNO vocabulary](https://finto.fi/kauno)
16
- used at the [Finto AI service](https://ai.finto.fi/).
17
- The current models were published there 2023-12-04.
18
- The models have been trained on Python 3.8.10 with [Annif](https://annif.org) version 1.0.0.
19
- See [projects.toml](projects.toml) for the configurations of the models.
20
-
21
- This repository is mirrored from GitHub to the 🤗 Hugging Face Hub;
22
- the GitHub repository does not contain the model files, but only the configurations for the projects and the DVC pipeline, see below.
23
-
24
- The training corpora that are public can be found from the [Annif-corpora repository](https://github.com/NatLibFi/Annif-corpora/).
25
-
26
- ## Models
27
- The downloadable directories for projects and vocabularies are stored in the
28
- [`/data`](https://huggingface.co/juhoinkinen/FintoAI-data-KAUNO/tree/main/data)
29
- directory of this repository in the 🤗 Hugging Face Hub.
30
-
31
- ## DVC pipeline
32
- The projects are trained and evaluated using a [DVC (Data Version Control) pipeline](https://dvc.org/doc/start/data-management/data-pipelines) defined in [dvc.yaml](./dvc.yaml).
33
-
34
- The pipeline takes care of
35
-
36
- 1. installing Annif in a venv,
37
- 2. loading the vocabulary,
38
- 3. training the projects,
39
- 4. evaluating the projects.
40
-
41
- When the necessary vocabulary and training corpora are in place the pipeline can be run using the command
42
-
43
- dvc repro
44
-
45
- For more information about using DVC with Annif projects see the [DVC exercise of Annif tutorial](https://github.com/NatLibFi/Annif-tutorial/blob/master/exercises/OPT_dvc.md).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
corpora/.gitignore DELETED
@@ -1,2 +0,0 @@
1
- /kauno-skos.ttl
2
- /kauno-skos-reduced.ttl
 
 
 
corpora/finna/.gitignore DELETED
@@ -1 +0,0 @@
1
- /kauno-finna-fin.tsv.gz
 
 
corpora/finna/kauno-finna-fin.tsv.gz.dvc DELETED
@@ -1,10 +0,0 @@
1
- md5: dae92ef1d2bed2a3c21e7631b6cdcb5e
2
- deps:
3
- - md5: ae570f73cbc72bd952efebd67575cda4
4
- size: 63666506
5
- path: /data/Annif-corpora/training/kauno-finna-fin.tsv.gz
6
- hash: md5
7
- outs:
8
- - md5: ae570f73cbc72bd952efebd67575cda4
9
- size: 63666506
10
- path: kauno-finna-fin.tsv.gz
 
 
 
 
 
 
 
 
 
 
 
corpora/kauno-skos-reduced.ttl.dvc DELETED
@@ -1,11 +0,0 @@
1
- md5: 4d29a28b7156dd12c51a8626e9f882bf
2
- deps:
3
- - md5: 6ca997fbaed020b9bfa867c991b19739
4
- size: 17501880
5
- path: /data/Annif-corpora-restricted/kirjasampo/kauno-skos-reduced.ttl
6
- hash: md5
7
- outs:
8
- - md5: 6ca997fbaed020b9bfa867c991b19739
9
- size: 17501880
10
- path: kauno-skos-reduced.ttl
11
- hash: md5
 
 
 
 
 
 
 
 
 
 
 
 
corpora/kirjasampo/.gitignore DELETED
@@ -1,3 +0,0 @@
1
- /kirjasampo-kauno-train.tsv
2
- /kirjasampo-kauno-validate.tsv
3
- /kirjasampo-kauno-test.tsv
 
 
 
 
corpora/kirjasampo/kirjasampo-kauno-test.tsv.dvc DELETED
@@ -1,11 +0,0 @@
1
- md5: cca190c38bb6b6fbdeb2c4ca5c85abd9
2
- deps:
3
- - md5: 2a491f3d2b35df602a3730db99f7605c
4
- size: 1502390
5
- path: /data/Annif-corpora-restricted/kirjasampo/kirjasampo-kauno-test.tsv
6
- hash: md5
7
- outs:
8
- - md5: 2a491f3d2b35df602a3730db99f7605c
9
- size: 1502390
10
- path: kirjasampo-kauno-test.tsv
11
- hash: md5
 
 
 
 
 
 
 
 
 
 
 
 
corpora/kirjasampo/kirjasampo-kauno-train.tsv.dvc DELETED
@@ -1,11 +0,0 @@
1
- md5: a59acca5aa93619ecdebc3083d0377a4
2
- deps:
3
- - md5: 77c01592d2c7c34f04bbee75b6584ec8
4
- size: 58959362
5
- path: /data/Annif-corpora-restricted/kirjasampo/kirjasampo-kauno-train.tsv
6
- hash: md5
7
- outs:
8
- - md5: 77c01592d2c7c34f04bbee75b6584ec8
9
- size: 58959362
10
- path: kirjasampo-kauno-train.tsv
11
- hash: md5
 
 
 
 
 
 
 
 
 
 
 
 
corpora/kirjasampo/kirjasampo-kauno-validate.tsv.dvc DELETED
@@ -1,11 +0,0 @@
1
- md5: 54798c42f7f1117903899095aa02821e
2
- deps:
3
- - md5: 6dbd774820c16dbd440da71151e36079
4
- size: 2455221
5
- path: /data/Annif-corpora-restricted/kirjasampo/kirjasampo-kauno-validate.tsv
6
- hash: md5
7
- outs:
8
- - md5: 6dbd774820c16dbd440da71151e36079
9
- size: 2455221
10
- path: kirjasampo-kauno-validate.tsv
11
- hash: md5
 
 
 
 
 
 
 
 
 
 
 
 
data/projects/.gitignore DELETED
@@ -1,8 +0,0 @@
1
- /kauno-mllm-fi
2
- /kauno-parabel-fi
3
- /kauno-bonsai-fi
4
- /kauno-ks-parabel-fi
5
- /kauno-finna-bonsai-fi
6
- /kauno-finna-parabel-fi
7
- /kauno-ks-bonsai-fi
8
- /kauno-fi
 
 
 
 
 
 
 
 
 
data/projects/kauno-fi/nn-model.keras DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1cb95f687837af160fb5477fefb33ff37f7a501271897ddbbbf09af5c1c0be6
3
- size 149871163
 
 
 
 
data/projects/kauno-fi/nn-train.mdb/data.mdb DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5784999158ec1b488511ae47f94dfa47df4059ba84bd14489d1884c7c918a80
3
- size 1073741824
 
 
 
 
data/projects/kauno-fi/nn-train.mdb/lock.mdb DELETED
Binary file (8.19 kB)
 
data/projects/kauno-finna-bonsai-fi/omikuji-model/settings.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "n_features": 189653,
3
- "classifier_loss_type": "Hinge"
4
- }
 
 
 
 
 
data/projects/kauno-finna-bonsai-fi/omikuji-model/tree0.cbor DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:54588c89afca8b5f0114401a10fea3b90a4ca14e8383011a3eeb47f12543e731
3
- size 89083689
 
 
 
 
data/projects/kauno-finna-bonsai-fi/omikuji-model/tree1.cbor DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d3ebd9d8a157da81c66acaa79939e89f51815456255b81697c71744d846487e
3
- size 91911094
 
 
 
 
data/projects/kauno-finna-bonsai-fi/omikuji-model/tree2.cbor DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6b629a7850b432870a75620aad06e4d018635fa738f869f0507430885f1a372
3
- size 92377936
 
 
 
 
data/projects/kauno-finna-bonsai-fi/omikuji-train.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ea993ef846ff13aa4333512d7957f82fa06a472539c049e0fe442c3d52d2bea
3
- size 260348534
 
 
 
 
data/projects/kauno-finna-bonsai-fi/vectorizer DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d857f06ce5dd57e64767c893b18a54c00972f79c8175ab2da3cc6a6ccc238e0f
3
- size 8282114
 
 
 
 
data/projects/kauno-ks-bonsai-fi/omikuji-model/settings.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "n_features": 559304,
3
- "classifier_loss_type": "Hinge"
4
- }
 
 
 
 
 
data/projects/kauno-ks-bonsai-fi/omikuji-model/tree0.cbor DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ed86aa637f8cc03de7882816ba98eae46588ef588709a18dda3d1c8c6f9ca9e
3
- size 176888182
 
 
 
 
data/projects/kauno-ks-bonsai-fi/omikuji-model/tree1.cbor DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcaf22d01c6bad86b1d0879e41daba55797da1b7d5ec880816933750a3eac6bb
3
- size 171920461
 
 
 
 
data/projects/kauno-ks-bonsai-fi/omikuji-model/tree2.cbor DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a46080a4f9de3db7681c0d6300a080f67d4fd341822557ae0ca9bb28f2b079a
3
- size 176032672
 
 
 
 
data/projects/kauno-ks-bonsai-fi/omikuji-train.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2ea4caa77f69e88d911b7e5517126e1e3798d94046bb2e0f838ee68a446729e
3
- size 158132742
 
 
 
 
data/projects/kauno-ks-bonsai-fi/vectorizer DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5072139c0ab30288c3cdb1fb64cc33f8253a34b6f5106244ed364102076cd821
3
- size 62393514
 
 
 
 
data/projects/kauno-mllm-fi/mllm-model.gz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2d27c28e284fead8b5adcbdd664d27950b9f8fd591bf8c29b654eb9d48159dc
3
- size 1680598
 
 
 
 
data/projects/kauno-mllm-fi/mllm-train.gz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a386eb288dd2612c4c0890d117214ef57c4f45680c9537f6c2401c943a0f36d9
3
- size 581135
 
 
 
 
data/vocabs/.gitignore DELETED
@@ -1 +0,0 @@
1
- /kauno
 
 
data/vocabs/kauno/subjects.csv DELETED
The diff for this file is too large to render. See raw diff
 
data/vocabs/kauno/subjects.dump.gz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:11f2cfcf3629e4b77f926d4ff137bdf8b72b2b2805826d8cddcab80c1e5da309
3
- size 14021827
 
 
 
 
data/vocabs/kauno/subjects.ttl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:929d04200265beb3fc6412d0c372a2b4f5a773a3e39d08e45f33dda6f9987e6b
3
- size 17501880
 
 
 
 
dvc.lock DELETED
@@ -1,542 +0,0 @@
1
- schema: '2.0'
2
- stages:
3
- install:
4
- cmd:
5
- - python3 -m venv venv
6
- - . venv/bin/activate && pip install -U pip wheel setuptools && pip install -r
7
- requirements.txt
8
- - cp requirements.txt venv-installed
9
- deps:
10
- - path: requirements.txt
11
- hash: md5
12
- md5: f85f21b68735b126c2241fbff83fd0ef
13
- size: 41
14
- outs:
15
- - path: venv-installed
16
- hash: md5
17
- md5: f85f21b68735b126c2241fbff83fd0ef
18
- size: 41
19
- loadvoc:
20
- cmd: venv/bin/annif load-vocab --force kauno corpora/kauno-skos-reduced.ttl
21
- deps:
22
- - path: corpora/kauno-skos-reduced.ttl
23
- hash: md5
24
- md5: 6ca997fbaed020b9bfa867c991b19739
25
- size: 17501880
26
- - path: venv-installed
27
- hash: md5
28
- md5: f85f21b68735b126c2241fbff83fd0ef
29
- size: 41
30
- outs:
31
- - path: data/vocabs/kauno
32
- hash: md5
33
- md5: 8d23b787082ad96201ffb484ec6fb681.dir
34
- size: 34076325
35
- nfiles: 3
36
- train-mllm:
37
- cmd: venv/bin/annif train kauno-mllm-fi -j 8 -d 2000 corpora/kirjasampo/kirjasampo-kauno-train.tsv
38
- deps:
39
- - path: corpora/kirjasampo/kirjasampo-kauno-train.tsv
40
- hash: md5
41
- md5: 77c01592d2c7c34f04bbee75b6584ec8
42
- size: 58959362
43
- - path: data/vocabs/kauno
44
- hash: md5
45
- md5: 8d23b787082ad96201ffb484ec6fb681.dir
46
- size: 34076325
47
- nfiles: 3
48
- - path: venv-installed
49
- hash: md5
50
- md5: f85f21b68735b126c2241fbff83fd0ef
51
- size: 41
52
- outs:
53
- - path: data/projects/kauno-mllm-fi
54
- hash: md5
55
- md5: 23c57a221af59d0d2541a7df9e37ad7c.dir
56
- size: 2261733
57
- nfiles: 2
58
- train-omikuji@parabel:
59
- cmd: venv/bin/annif train kauno-parabel-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
60
- deps:
61
- - path: corpora/kirjasampo/kirjasampo-kauno-train.tsv
62
- md5: 6200fab3e9179186787cd49dbcaa587a
63
- size: 39921230
64
- - path: data/vocabs/kauno
65
- md5: 43f49eabb66648de35d01d57d196e196.dir
66
- size: 33952771
67
- nfiles: 3
68
- - path: venv-installed
69
- md5: b1b83339d2df42e93f3df0e83b1e85ab
70
- size: 42
71
- outs:
72
- - path: data/projects/kauno-parabel-fi
73
- md5: ba358be8ebda5347eba4825fcebcf43a.dir
74
- size: 327572347
75
- nfiles: 6
76
- train-omikuji@bonsai:
77
- cmd: venv/bin/annif train kauno-bonsai-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
78
- deps:
79
- - path: corpora/kirjasampo/kirjasampo-kauno-train.tsv
80
- md5: 6200fab3e9179186787cd49dbcaa587a
81
- size: 39921230
82
- - path: data/vocabs/kauno
83
- md5: 43f49eabb66648de35d01d57d196e196.dir
84
- size: 33952771
85
- nfiles: 3
86
- - path: venv-installed
87
- md5: b1b83339d2df42e93f3df0e83b1e85ab
88
- size: 42
89
- outs:
90
- - path: data/projects/kauno-bonsai-fi
91
- md5: 93b109deb653d12ea6137f3f67742934.dir
92
- size: 428545588
93
- nfiles: 6
94
- eval@bonsai:
95
- cmd:
96
- - venv/bin/annif eval kauno-bonsai-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-bonsai-fi.json
97
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
98
- deps:
99
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
100
- md5: 48611b60c8006116b9c8686456dcfbf6
101
- size: 1535757
102
- - path: data/projects/kauno-bonsai-fi
103
- md5: 93b109deb653d12ea6137f3f67742934.dir
104
- size: 428545588
105
- nfiles: 6
106
- - path: venv-installed
107
- md5: b1b83339d2df42e93f3df0e83b1e85ab
108
- size: 42
109
- outs:
110
- - path: reports/kauno-bonsai-fi.json
111
- md5: 0cb706d29e2c34c41292f422c0d71dd6
112
- size: 93
113
- eval@mllm:
114
- cmd:
115
- - venv/bin/annif eval kauno-mllm-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-mllm-fi.json
116
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
117
- deps:
118
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
119
- hash: md5
120
- md5: 2a491f3d2b35df602a3730db99f7605c
121
- size: 1502390
122
- - path: data/projects/kauno-mllm-fi
123
- hash: md5
124
- md5: 23c57a221af59d0d2541a7df9e37ad7c.dir
125
- size: 2261733
126
- nfiles: 2
127
- - path: venv-installed
128
- hash: md5
129
- md5: f85f21b68735b126c2241fbff83fd0ef
130
- size: 41
131
- outs:
132
- - path: reports/kauno-mllm-fi.json
133
- hash: md5
134
- md5: 9b94207439de0dee3c8efb58eaf23bdb
135
- size: 95
136
- eval@parabel:
137
- cmd:
138
- - venv/bin/annif eval kauno-parabel-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-parabel-fi.json
139
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
140
- deps:
141
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
142
- md5: 48611b60c8006116b9c8686456dcfbf6
143
- size: 1535757
144
- - path: data/projects/kauno-parabel-fi
145
- md5: ba358be8ebda5347eba4825fcebcf43a.dir
146
- size: 327572347
147
- nfiles: 6
148
- - path: venv-installed
149
- md5: b1b83339d2df42e93f3df0e83b1e85ab
150
- size: 42
151
- outs:
152
- - path: reports/kauno-parabel-fi.json
153
- md5: 56697a462498d0fc1b0dbe0f39dce592
154
- size: 94
155
- eval@ensemble:
156
- cmd:
157
- - venv/bin/annif eval kauno-ensemble-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-ensemble-fi.json
158
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
159
- deps:
160
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
161
- hash: md5
162
- md5: 2a491f3d2b35df602a3730db99f7605c
163
- size: 1502390
164
- - path: data/projects/kauno-ensemble-fi
165
- hash: md5
166
- md5: 3c5b9e385b3d59c658f823a92dfd34f6.dir
167
- size: 1223621179
168
- nfiles: 3
169
- - path: venv-installed
170
- hash: md5
171
- md5: f85f21b68735b126c2241fbff83fd0ef
172
- size: 41
173
- outs:
174
- - path: reports/kauno-ensemble-fi.json
175
- hash: md5
176
- md5: bc2d8f32c841e8941bba9fcd88a92896
177
- size: 94
178
- train-omikuji-ks@parabel:
179
- cmd: venv/bin/annif train kauno-ks-parabel-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
180
- deps:
181
- - path: corpora/kirjasampo/kirjasampo-kauno-train.tsv
182
- md5: 6200fab3e9179186787cd49dbcaa587a
183
- size: 39921230
184
- - path: data/vocabs/kauno
185
- md5: 43f49eabb66648de35d01d57d196e196.dir
186
- size: 33952771
187
- nfiles: 3
188
- - path: venv-installed
189
- md5: b1b83339d2df42e93f3df0e83b1e85ab
190
- size: 42
191
- outs:
192
- - path: data/projects/kauno-ks-parabel-fi
193
- md5: d24263f3ad1b5c3373ba6b1b86a4a222.dir
194
- size: 327667525
195
- nfiles: 6
196
- train-omikuji-finna@bonsai:
197
- cmd: venv/bin/annif train kauno-finna-bonsai-fi -j 8 corpora/finna/kauno-finna-fin.tsv.gz
198
- deps:
199
- - path: corpora/finna/kauno-finna-fin.tsv.gz
200
- md5: ae570f73cbc72bd952efebd67575cda4
201
- size: 63666506
202
- - path: data/vocabs/kauno
203
- md5: 43f49eabb66648de35d01d57d196e196.dir
204
- size: 33952771
205
- nfiles: 3
206
- - path: venv-installed
207
- md5: b1b83339d2df42e93f3df0e83b1e85ab
208
- size: 42
209
- outs:
210
- - path: data/projects/kauno-finna-bonsai-fi
211
- md5: 16418f34c02a34155b9db383138a8d87.dir
212
- size: 546792457
213
- nfiles: 6
214
- train-omikuji-finna@parabel:
215
- cmd: venv/bin/annif train kauno-finna-parabel-fi -j 8 corpora/finna/kauno-finna-fin.tsv.gz
216
- deps:
217
- - path: corpora/finna/kauno-finna-fin.tsv.gz
218
- md5: ae570f73cbc72bd952efebd67575cda4
219
- size: 63666506
220
- - path: data/vocabs/kauno
221
- md5: 43f49eabb66648de35d01d57d196e196.dir
222
- size: 33952771
223
- nfiles: 3
224
- - path: venv-installed
225
- md5: b1b83339d2df42e93f3df0e83b1e85ab
226
- size: 42
227
- outs:
228
- - path: data/projects/kauno-finna-parabel-fi
229
- md5: f952af490b34403547b9dd3204d63e15.dir
230
- size: 379427399
231
- nfiles: 6
232
- eval@finna-bonsai:
233
- cmd:
234
- - venv/bin/annif eval kauno-finna-bonsai-fi -j 8 -m F1@5 -m NDCG --metrics-file
235
- reports/kauno-finna-bonsai-fi.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
236
- deps:
237
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
238
- hash: md5
239
- md5: 2a491f3d2b35df602a3730db99f7605c
240
- size: 1502390
241
- - path: data/projects/kauno-finna-bonsai-fi
242
- hash: md5
243
- md5: eb1d5d6d196c0237832da37d53680b78.dir
244
- size: 542003428
245
- nfiles: 6
246
- - path: venv-installed
247
- hash: md5
248
- md5: f85f21b68735b126c2241fbff83fd0ef
249
- size: 41
250
- outs:
251
- - path: reports/kauno-finna-bonsai-fi.json
252
- hash: md5
253
- md5: fe5bc33b1f952c4d330ee1a12613fd9c
254
- size: 94
255
- eval@finna-parabel:
256
- cmd:
257
- - venv/bin/annif eval kauno-finna-parabel-fi -j 8 -m F1@5 -m NDCG --metrics-file
258
- reports/kauno-finna-parabel-fi.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
259
- deps:
260
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
261
- md5: 48611b60c8006116b9c8686456dcfbf6
262
- size: 1535757
263
- - path: data/projects/kauno-finna-parabel-fi
264
- md5: f952af490b34403547b9dd3204d63e15.dir
265
- size: 379427399
266
- nfiles: 6
267
- - path: venv-installed
268
- md5: b1b83339d2df42e93f3df0e83b1e85ab
269
- size: 42
270
- outs:
271
- - path: reports/kauno-finna-parabel-fi.json
272
- md5: e037704630021469933d06952c18d27e
273
- size: 95
274
- eval@ks-parabel:
275
- cmd:
276
- - venv/bin/annif eval kauno-ks-parabel-fi -j 8 -m F1@5 -m NDCG --metrics-file
277
- reports/kauno-ks-parabel-fi.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
278
- deps:
279
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
280
- md5: 48611b60c8006116b9c8686456dcfbf6
281
- size: 1535757
282
- - path: data/projects/kauno-ks-parabel-fi
283
- md5: d24263f3ad1b5c3373ba6b1b86a4a222.dir
284
- size: 327667525
285
- nfiles: 6
286
- - path: venv-installed
287
- md5: b1b83339d2df42e93f3df0e83b1e85ab
288
- size: 42
289
- outs:
290
- - path: reports/kauno-ks-parabel-fi.json
291
- md5: 5615be39b149e641186f32916d80b0a5
292
- size: 94
293
- train-omikuji-ks@bonsai:
294
- cmd: venv/bin/annif train kauno-ks-bonsai-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
295
- deps:
296
- - path: corpora/kirjasampo/kirjasampo-kauno-train.tsv
297
- md5: 6200fab3e9179186787cd49dbcaa587a
298
- size: 39921230
299
- - path: data/vocabs/kauno
300
- md5: 43f49eabb66648de35d01d57d196e196.dir
301
- size: 33952771
302
- nfiles: 3
303
- - path: venv-installed
304
- md5: b1b83339d2df42e93f3df0e83b1e85ab
305
- size: 42
306
- outs:
307
- - path: data/projects/kauno-ks-bonsai-fi
308
- md5: 3130bd53059af8c55630d94116657d4f.dir
309
- size: 426174615
310
- nfiles: 6
311
- eval@ks-bonsai:
312
- cmd:
313
- - venv/bin/annif eval kauno-ks-bonsai-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-ks-bonsai-fi.json
314
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
315
- deps:
316
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
317
- hash: md5
318
- md5: 2a491f3d2b35df602a3730db99f7605c
319
- size: 1502390
320
- - path: data/projects/kauno-ks-bonsai-fi
321
- hash: md5
322
- md5: 7f3dae80e066dae4428a86bba7eea413.dir
323
- size: 745367632
324
- nfiles: 6
325
- - path: venv-installed
326
- hash: md5
327
- md5: f85f21b68735b126c2241fbff83fd0ef
328
- size: 41
329
- outs:
330
- - path: reports/kauno-ks-bonsai-fi.json
331
- hash: md5
332
- md5: 9c16b8188723d2a92c8a4117e841fa83
333
- size: 94
334
- train-omikuji-ks:
335
- cmd: venv/bin/annif train kauno-ks-bonsai-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
336
- deps:
337
- - path: corpora/kirjasampo/kirjasampo-kauno-train.tsv
338
- hash: md5
339
- md5: 77c01592d2c7c34f04bbee75b6584ec8
340
- size: 58959362
341
- - path: data/vocabs/kauno
342
- hash: md5
343
- md5: 8d23b787082ad96201ffb484ec6fb681.dir
344
- size: 34076325
345
- nfiles: 3
346
- - path: venv-installed
347
- hash: md5
348
- md5: f85f21b68735b126c2241fbff83fd0ef
349
- size: 41
350
- outs:
351
- - path: data/projects/kauno-ks-bonsai-fi
352
- hash: md5
353
- md5: 7f3dae80e066dae4428a86bba7eea413.dir
354
- size: 745367632
355
- nfiles: 6
356
- train-omikuji-finna:
357
- cmd: venv/bin/annif train kauno-finna-bonsai-fi -j 8 corpora/finna/kauno-finna-fin.tsv.gz
358
- deps:
359
- - path: corpora/finna/kauno-finna-fin.tsv.gz
360
- md5: ae570f73cbc72bd952efebd67575cda4
361
- size: 63666506
362
- - path: data/vocabs/kauno
363
- hash: md5
364
- md5: 8d23b787082ad96201ffb484ec6fb681.dir
365
- size: 34076325
366
- nfiles: 3
367
- - path: venv-installed
368
- hash: md5
369
- md5: f85f21b68735b126c2241fbff83fd0ef
370
- size: 41
371
- outs:
372
- - path: data/projects/kauno-finna-bonsai-fi
373
- hash: md5
374
- md5: eb1d5d6d196c0237832da37d53680b78.dir
375
- size: 542003428
376
- nfiles: 6
377
- train-ensemble:
378
- cmd: venv/bin/annif train -j 8 kauno-ensemble-fi corpora/kirjasampo/kirjasampo-kauno-validate.tsv
379
- deps:
380
- - path: corpora/kirjasampo/kirjasampo-kauno-validate.tsv
381
- hash: md5
382
- md5: 6dbd774820c16dbd440da71151e36079
383
- size: 2455221
384
- - path: data/projects/kauno-finna-bonsai-fi
385
- hash: md5
386
- md5: eb1d5d6d196c0237832da37d53680b78.dir
387
- size: 542003428
388
- nfiles: 6
389
- - path: data/projects/kauno-ks-bonsai-fi
390
- hash: md5
391
- md5: 7f3dae80e066dae4428a86bba7eea413.dir
392
- size: 745367632
393
- nfiles: 6
394
- - path: data/projects/kauno-mllm-fi
395
- hash: md5
396
- md5: 23c57a221af59d0d2541a7df9e37ad7c.dir
397
- size: 2261733
398
- nfiles: 2
399
- - path: data/vocabs/kauno
400
- hash: md5
401
- md5: 8d23b787082ad96201ffb484ec6fb681.dir
402
- size: 34076325
403
- nfiles: 3
404
- - path: venv-installed
405
- hash: md5
406
- md5: f85f21b68735b126c2241fbff83fd0ef
407
- size: 41
408
- outs:
409
- - path: data/projects/kauno-ensemble-fi
410
- hash: md5
411
- md5: 3c5b9e385b3d59c658f823a92dfd34f6.dir
412
- size: 1223621179
413
- nfiles: 3
414
- train-nn-ensemble:
415
- cmd: venv/bin/annif train -j 8 kauno-fi corpora/kirjasampo/kirjasampo-kauno-validate.tsv
416
- deps:
417
- - path: corpora/kirjasampo/kirjasampo-kauno-validate.tsv
418
- hash: md5
419
- md5: 6dbd774820c16dbd440da71151e36079
420
- size: 2455221
421
- - path: data/projects/kauno-finna-bonsai-fi
422
- hash: md5
423
- md5: eb1d5d6d196c0237832da37d53680b78.dir
424
- size: 542003428
425
- nfiles: 6
426
- - path: data/projects/kauno-ks-bonsai-fi
427
- hash: md5
428
- md5: 7f3dae80e066dae4428a86bba7eea413.dir
429
- size: 745367632
430
- nfiles: 6
431
- - path: data/projects/kauno-mllm-fi
432
- hash: md5
433
- md5: 23c57a221af59d0d2541a7df9e37ad7c.dir
434
- size: 2261733
435
- nfiles: 2
436
- - path: data/vocabs/kauno
437
- hash: md5
438
- md5: 8d23b787082ad96201ffb484ec6fb681.dir
439
- size: 34076325
440
- nfiles: 3
441
- - path: venv-installed
442
- hash: md5
443
- md5: f85f21b68735b126c2241fbff83fd0ef
444
- size: 41
445
- outs:
446
- - path: data/projects/kauno-fi
447
- hash: md5
448
- md5: ce989b5df6c6771b0e60aefb8a7d4ec9.dir
449
- size: 1223621179
450
- nfiles: 3
451
- eval@mllm-fi:
452
- cmd:
453
- - venv/bin/annif eval kauno-mllm-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-mllm-fi.json
454
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
455
- deps:
456
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
457
- hash: md5
458
- md5: 2a491f3d2b35df602a3730db99f7605c
459
- size: 1502390
460
- - path: data/projects/kauno-mllm-fi
461
- hash: md5
462
- md5: 23c57a221af59d0d2541a7df9e37ad7c.dir
463
- size: 2261733
464
- nfiles: 2
465
- - path: venv-installed
466
- hash: md5
467
- md5: f85f21b68735b126c2241fbff83fd0ef
468
- size: 41
469
- outs:
470
- - path: reports/kauno-mllm-fi.json
471
- hash: md5
472
- md5: 9b94207439de0dee3c8efb58eaf23bdb
473
- size: 95
474
- eval@ks-bonsai-fi:
475
- cmd:
476
- - venv/bin/annif eval kauno-ks-bonsai-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-ks-bonsai-fi.json
477
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
478
- deps:
479
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
480
- hash: md5
481
- md5: 2a491f3d2b35df602a3730db99f7605c
482
- size: 1502390
483
- - path: data/projects/kauno-ks-bonsai-fi
484
- hash: md5
485
- md5: 7f3dae80e066dae4428a86bba7eea413.dir
486
- size: 745367632
487
- nfiles: 6
488
- - path: venv-installed
489
- hash: md5
490
- md5: f85f21b68735b126c2241fbff83fd0ef
491
- size: 41
492
- outs:
493
- - path: reports/kauno-ks-bonsai-fi.json
494
- hash: md5
495
- md5: 9c16b8188723d2a92c8a4117e841fa83
496
- size: 94
497
- eval@finna-bonsai-fi:
498
- cmd:
499
- - venv/bin/annif eval kauno-finna-bonsai-fi -j 8 -m F1@5 -m NDCG --metrics-file
500
- reports/kauno-finna-bonsai-fi.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
501
- deps:
502
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
503
- hash: md5
504
- md5: 2a491f3d2b35df602a3730db99f7605c
505
- size: 1502390
506
- - path: data/projects/kauno-finna-bonsai-fi
507
- hash: md5
508
- md5: eb1d5d6d196c0237832da37d53680b78.dir
509
- size: 542003428
510
- nfiles: 6
511
- - path: venv-installed
512
- hash: md5
513
- md5: f85f21b68735b126c2241fbff83fd0ef
514
- size: 41
515
- outs:
516
- - path: reports/kauno-finna-bonsai-fi.json
517
- hash: md5
518
- md5: fe5bc33b1f952c4d330ee1a12613fd9c
519
- size: 94
520
- eval@fi:
521
- cmd:
522
- - venv/bin/annif eval kauno-fi -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-fi.json
523
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
524
- deps:
525
- - path: corpora/kirjasampo/kirjasampo-kauno-test.tsv
526
- hash: md5
527
- md5: 2a491f3d2b35df602a3730db99f7605c
528
- size: 1502390
529
- - path: data/projects/kauno-fi
530
- hash: md5
531
- md5: ce989b5df6c6771b0e60aefb8a7d4ec9.dir
532
- size: 1223621179
533
- nfiles: 3
534
- - path: venv-installed
535
- hash: md5
536
- md5: f85f21b68735b126c2241fbff83fd0ef
537
- size: 41
538
- outs:
539
- - path: reports/kauno-fi.json
540
- hash: md5
541
- md5: 500d93c3d47823dbef945b2efa8899e4
542
- size: 93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dvc.yaml DELETED
@@ -1,76 +0,0 @@
1
- stages:
2
- # Ensure Annif is installed
3
- install:
4
- cmd:
5
- - python3 -m venv venv
6
- - . venv/bin/activate && pip install -U pip wheel setuptools && pip install -r requirements.txt
7
- - cp requirements.txt venv-installed
8
- deps:
9
- - requirements.txt
10
- outs:
11
- - venv-installed:
12
- cache: false
13
- # Load KAUNO vocabulary
14
- loadvoc:
15
- cmd: venv/bin/annif load-vocab --force kauno corpora/kauno-skos-reduced.ttl
16
- deps:
17
- - venv-installed
18
- - corpora/kauno-skos-reduced.ttl
19
- outs:
20
- - data/vocabs/kauno
21
- # Train MLLM project
22
- train-mllm:
23
- cmd: venv/bin/annif train kauno-mllm-fi -j 8 -d 2000 corpora/kirjasampo/kirjasampo-kauno-train.tsv
24
- deps:
25
- - venv-installed
26
- - corpora/kirjasampo/kirjasampo-kauno-train.tsv
27
- - data/vocabs/kauno
28
- outs:
29
- - data/projects/kauno-mllm-fi
30
- # Train Omikuji project using Kirjasampo data
31
- train-omikuji-ks:
32
- cmd: venv/bin/annif train kauno-ks-bonsai-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
33
- deps:
34
- - venv-installed
35
- - corpora/kirjasampo/kirjasampo-kauno-train.tsv
36
- - data/vocabs/kauno
37
- outs:
38
- - data/projects/kauno-ks-bonsai-fi
39
- # Train Omikuji project using Finna data
40
- train-omikuji-finna:
41
- cmd: venv/bin/annif train kauno-finna-bonsai-fi -j 8 corpora/finna/kauno-finna-fin.tsv.gz
42
- deps:
43
- - venv-installed
44
- - corpora/finna/kauno-finna-fin.tsv.gz
45
- - data/vocabs/kauno
46
- outs:
47
- - data/projects/kauno-finna-bonsai-fi
48
- # Train NN ensemble
49
- train-nn-ensemble:
50
- cmd: venv/bin/annif train -j 8 kauno-fi corpora/kirjasampo/kirjasampo-kauno-validate.tsv
51
- deps:
52
- - venv-installed
53
- - corpora/kirjasampo/kirjasampo-kauno-validate.tsv
54
- - data/vocabs/kauno
55
- - data/projects/kauno-mllm-fi
56
- - data/projects/kauno-ks-bonsai-fi
57
- - data/projects/kauno-finna-bonsai-fi
58
- outs:
59
- - data/projects/kauno-fi
60
- # Evaluate projects
61
- eval:
62
- foreach:
63
- - mllm-fi
64
- - ks-bonsai-fi
65
- - finna-bonsai-fi
66
- - fi
67
- do:
68
- cmd:
69
- - venv/bin/annif eval kauno-${item} -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-${item}.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
70
- deps:
71
- - venv-installed
72
- - corpora/kirjasampo/kirjasampo-kauno-test.tsv
73
- - data/projects/kauno-${item}
74
- metrics:
75
- - reports/kauno-${item}.json:
76
- cache: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
projects.d/2-projects-kauno.toml DELETED
@@ -1,44 +0,0 @@
1
- # KAUNO Finnish
2
-
3
- [kauno-mllm-fi]
4
- name="KAUNO MLLM Finnish"
5
- language="fi"
6
- backend="mllm"
7
- analyzer="voikko(fi)"
8
- vocab="kauno"
9
- access="hidden"
10
-
11
- [kauno-ks-bonsai-fi]
12
- name="KAUNO KS Omikuji Bonsai Finnish"
13
- language="fi"
14
- backend="omikuji"
15
- analyzer="voikko(fi)"
16
- vocab="kauno"
17
- cluster_balanced=false
18
- cluster_k=100
19
- max_depth=3
20
- min_df=2
21
- ngram=2
22
- transform="limit(5000)"
23
- access="hidden"
24
-
25
- [kauno-finna-bonsai-fi]
26
- name="KAUNO Finna Omikuji Bonsai Finnish"
27
- language="fi"
28
- backend="omikuji"
29
- analyzer="voikko(fi)"
30
- vocab="kauno"
31
- cluster_balanced=false
32
- cluster_k=100
33
- max_depth=3
34
- min_df=2
35
- ngram=2
36
- transform="limit(5000)"
37
- access="hidden"
38
-
39
- [kauno-fi]
40
- name="KAUNO suomi"
41
- language="fi"
42
- backend="nn_ensemble"
43
- sources="kauno-mllm-fi:0.1293,kauno-ks-bonsai-fi:0.5518,kauno-finna-bonsai-fi:0.3189"
44
- vocab="kauno"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
projects.toml DELETED
@@ -1,44 +0,0 @@
1
- # KAUNO Finnish
2
-
3
- [kauno-mllm-fi]
4
- name="KAUNO MLLM Finnish"
5
- language="fi"
6
- backend="mllm"
7
- analyzer="voikko(fi)"
8
- vocab="kauno"
9
- access="hidden"
10
-
11
- [kauno-ks-bonsai-fi]
12
- name="KAUNO KS Omikuji Bonsai Finnish"
13
- language="fi"
14
- backend="omikuji"
15
- analyzer="voikko(fi)"
16
- vocab="kauno"
17
- cluster_balanced=false
18
- cluster_k=100
19
- max_depth=3
20
- min_df=2
21
- ngram=2
22
- transform="limit(5000)"
23
- access="hidden"
24
-
25
- [kauno-finna-bonsai-fi]
26
- name="KAUNO Finna Omikuji Bonsai Finnish"
27
- language="fi"
28
- backend="omikuji"
29
- analyzer="voikko(fi)"
30
- vocab="kauno"
31
- cluster_balanced=false
32
- cluster_k=100
33
- max_depth=3
34
- min_df=2
35
- ngram=2
36
- transform="limit(5000)"
37
- access="hidden"
38
-
39
- [kauno-fi]
40
- name="KAUNO suomi"
41
- language="fi"
42
- backend="nn_ensemble"
43
- sources="kauno-mllm-fi:0.1293,kauno-ks-bonsai-fi:0.5518,kauno-finna-bonsai-fi:0.3189"
44
- vocab="kauno"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
reports/kauno-fi.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "F1@5": 0.4662634707739284,
3
- "NDCG": 0.6133524179458618,
4
- "Documents_evaluated": 1367
5
- }
 
 
 
 
 
 
reports/kauno-finna-bonsai-fi.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "F1@5": 0.2873918358957718,
3
- "NDCG": 0.38556820154190063,
4
- "Documents_evaluated": 1367
5
- }
 
 
 
 
 
 
reports/kauno-ks-bonsai-fi.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "F1@5": 0.39796013215991816,
3
- "NDCG": 0.5301554799079895,
4
- "Documents_evaluated": 1367
5
- }
 
 
 
 
 
 
reports/kauno-mllm-fi.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "F1@5": 0.18675745352815562,
3
- "NDCG": 0.24778875708580017,
4
- "Documents_evaluated": 1367
5
- }
 
 
 
 
 
 
requirements.txt DELETED
@@ -1 +0,0 @@
1
- annif[fasttext,omikuji,nn,voikko]==1.0.*
 
 
sync-model-data-ocp.sh DELETED
@@ -1,32 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Runs rsync to transfer model data from the current directory to an OpenShift volume
4
- # that is attached to a pod which is running Annif. The instance
5
- # {api-annif-org,ai-finto-fi, etc.} to transfer to is given as the argument.
6
- # You need to be logged to the cluster with the oc tool.
7
-
8
- set -e
9
-
10
- if [ $# -ne 1 ]
11
- then
12
- echo "Not enough arguments; argument 1: destination_instance"
13
- exit 1
14
- fi
15
-
16
- pod=$(oc get pods -l app.kubernetes.io/instance=$1,app.kubernetes.io/name=annif -o name)
17
-
18
- if [[ $pod = *[[:space:]]* ]]
19
- then
20
- echo "Multiple pod exists; using first"
21
- pod=(${pod//$'\n'/ })
22
- fi
23
- echo "Target is "$pod
24
- pod=${pod#pod/}
25
- if [ -z "${pod}" ]
26
- then
27
- echo "No target pod found"
28
- exit 1
29
- fi
30
-
31
- rsync --rsh='oc rsh' -avrL --exclude="*train*" --exclude="*zip" --inplace projects.d $pod:/annif-projects
32
- rsync --rsh='oc rsh' -avrL --exclude="*train*" --exclude="*zip" --inplace data/{projects,vocabs} $pod:/annif-projects/data