{ "lang": "da", "name": "dacy_medium_trf", "version": "0.2.0", "description": "\n\n\n# DaCy medium\n\nDaCy is a Danish language processing framework with state-of-the-art pipelines as well as functionality for analysing Danish pipelines.\nDaCy's largest pipeline has achieved State-of-the-Art performance on parts-of-speech tagging and dependency \nparsing for Danish on the DaNE dataset. To read more check out the [DaCy repository](https://github.com/centre-for-humanities-computing/DaCy) for material on how to use DaCy and reproduce the results. \nDaCy also contains guides on usage of the package as well as behavioural test for biases and robustness of Danish NLP pipelines.\n", "author": "Kenneth Enevoldsen", "email": "Kenneth.enevoldsen@cas.au.dk", "url": "https://chcaa.io/#/", "license": "Apache-2.0", "spacy_version": ">=3.5.2,<3.6.0", "spacy_git_version": "Unknown", "vectors": { "width": 0, "vectors": 0, "keys": 0, "name": null }, "labels": { "transformer": [], "tagger": [ "ADJ", "ADP", "ADV", "AUX", "CCONJ", "DET", "INTJ", "NOUN", "NUM", "PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X" ], "morphologizer": [ "AdpType=Prep|POS=ADP", "Definite=Ind|Gender=Com|Number=Sing|POS=NOUN", "Mood=Ind|POS=AUX|Tense=Pres|VerbForm=Fin|Voice=Act", "POS=PROPN", "Definite=Ind|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part", "Definite=Def|Gender=Neut|Number=Sing|POS=NOUN", "POS=SCONJ", "Definite=Def|Gender=Com|Number=Sing|POS=NOUN", "Mood=Ind|POS=VERB|Tense=Pres|VerbForm=Fin|Voice=Act", "POS=ADV", "Number=Plur|POS=DET|PronType=Dem", "Degree=Pos|Number=Plur|POS=ADJ", "Definite=Ind|Gender=Com|Number=Plur|POS=NOUN", "POS=PUNCT", "NumType=Ord|POS=ADJ", "POS=CCONJ", "Definite=Ind|Gender=Neut|Number=Plur|POS=NOUN", "POS=VERB|VerbForm=Inf|Voice=Act", "Case=Acc|Gender=Neut|Number=Sing|POS=PRON|Person=3|PronType=Prs", "Degree=Sup|POS=ADV", "Degree=Pos|POS=ADV", "Gender=Com|Number=Sing|POS=DET|PronType=Ind", "Number=Plur|POS=DET|PronType=Ind", "POS=ADP", "POS=ADV|PartType=Inf", "Case=Nom|Gender=Com|Number=Sing|POS=PRON|Person=3|PronType=Prs", "Mood=Ind|POS=AUX|Tense=Past|VerbForm=Fin|Voice=Act", "Definite=Def|Degree=Pos|Number=Sing|POS=ADJ", "Number[psor]=Sing|POS=DET|Person=3|Poss=Yes|PronType=Prs", "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin|Voice=Act", "POS=ADP|PartType=Inf", "Definite=Ind|Degree=Pos|Gender=Com|Number=Sing|POS=ADJ", "NumType=Card|POS=NUM", "Degree=Pos|POS=ADJ", "Definite=Ind|Number=Sing|POS=AUX|Tense=Past|VerbForm=Part", "POS=PART|PartType=Inf", "Case=Acc|POS=PRON|Person=3|PronType=Prs|Reflex=Yes", "Definite=Def|Gender=Com|Number=Plur|POS=NOUN", "Definite=Ind|Gender=Neut|Number=Sing|POS=NOUN", "Number[psor]=Plur|POS=DET|Person=3|Poss=Yes|PronType=Prs", "POS=VERB|Tense=Pres|VerbForm=Part", "Case=Nom|Number=Plur|POS=PRON|Person=3|PronType=Prs", "Case=Gen|Definite=Def|Gender=Com|Number=Sing|POS=NOUN", "Definite=Def|Degree=Sup|Number=Plur|POS=ADJ", "Case=Acc|Number=Plur|POS=PRON|Person=3|PronType=Prs", "POS=AUX|VerbForm=Inf|Voice=Act", "Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing|POS=ADJ", "Definite=Ind|Degree=Cmp|Number=Sing|POS=ADJ", "Degree=Cmp|POS=ADJ", "POS=PRON|PartType=Inf", "Definite=Ind|Degree=Pos|Number=Sing|POS=ADJ", "Case=Nom|Gender=Com|POS=PRON|PronType=Ind", "Number=Plur|POS=PRON|PronType=Ind", "POS=INTJ", "Gender=Com|Number=Sing|POS=DET|PronType=Dem", "Case=Gen|Number=Plur|POS=DET|PronType=Ind", "Mood=Ind|POS=VERB|Tense=Pres|VerbForm=Fin|Voice=Pass", "Definite=Def|Gender=Neut|Number=Plur|POS=NOUN", "Degree=Cmp|POS=ADV", "Number=Plur|Number[psor]=Plur|POS=PRON|Person=1|Poss=Yes|PronType=Prs|Style=Form", "Case=Acc|Gender=Com|Number=Sing|POS=PRON|Person=3|PronType=Prs", "Number=Plur|Number[psor]=Sing|POS=DET|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes", "Case=Gen|POS=PROPN", "Gender=Neut|Number=Sing|POS=PRON|PronType=Ind", "Number=Plur|POS=VERB|Tense=Past|VerbForm=Part", "Gender=Neut|Number=Sing|Number[psor]=Sing|POS=DET|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes", "Case=Acc|Gender=Com|Number=Sing|POS=PRON|Person=1|PronType=Prs", "Definite=Def|Degree=Sup|POS=ADJ", "Gender=Neut|Number=Sing|POS=DET|PronType=Ind", "Case=Gen|Definite=Ind|Gender=Neut|Number=Sing|POS=NOUN", "Gender=Neut|Number=Sing|POS=DET|PronType=Dem", "Definite=Def|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part", "POS=PRON|PronType=Dem", "Degree=Pos|Gender=Com|Number=Sing|POS=ADJ", "Number=Plur|POS=NUM", "POS=VERB|VerbForm=Inf|Voice=Pass", "Definite=Def|Degree=Sup|Number=Sing|POS=ADJ", "Number=Sing|POS=PRON|PronType=Int,Rel", "Case=Nom|Gender=Com|Number=Sing|POS=PRON|Person=1|PronType=Prs", "Gender=Neut|Number=Sing|Number[psor]=Sing|POS=DET|Person=1|Poss=Yes|PronType=Prs", "Gender=Com|Number=Sing|Number[psor]=Sing|POS=DET|Person=1|Poss=Yes|PronType=Prs", "POS=PRON", "Definite=Ind|Number=Sing|POS=NOUN", "Definite=Ind|Number=Sing|POS=NUM", "Case=Gen|Definite=Ind|Gender=Com|Number=Sing|POS=NOUN", "Foreign=Yes|POS=ADV", "POS=NOUN", "Case=Gen|Definite=Def|Gender=Neut|Number=Sing|POS=NOUN", "Gender=Com|Number=Plur|POS=NOUN", "Gender=Neut|Number=Sing|POS=PRON|PronType=Int,Rel", "Case=Nom|Gender=Com|Number=Plur|POS=PRON|Person=1|PronType=Prs", "Number[psor]=Plur|POS=DET|Person=1|Poss=Yes|PronType=Prs", "Gender=Com|Number=Sing|POS=PRON|PronType=Ind", "Case=Gen|Definite=Ind|Gender=Com|Number=Plur|POS=NOUN", "Degree=Pos|Gender=Neut|Number=Sing|POS=ADJ", "Degree=Sup|POS=ADJ", "Degree=Pos|Number=Sing|POS=ADJ", "Mood=Imp|POS=VERB", "Case=Nom|Gender=Com|POS=PRON|Person=2|Polite=Form|PronType=Prs", "Case=Acc|Gender=Com|POS=PRON|Person=2|Polite=Form|PronType=Prs", "POS=X", "Case=Gen|Definite=Def|Gender=Com|Number=Plur|POS=NOUN", "Number=Plur|POS=PRON|PronType=Dem", "Case=Acc|Gender=Com|Number=Plur|POS=PRON|Person=1|PronType=Prs", "Number=Plur|POS=PRON|PronType=Int,Rel", "Gender=Com|Number=Sing|Number[psor]=Sing|POS=DET|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes", "Degree=Cmp|Number=Plur|POS=ADJ", "Number=Plur|Number[psor]=Sing|POS=DET|Person=1|Poss=Yes|PronType=Prs", "Gender=Com|Number=Sing|Number[psor]=Plur|POS=DET|Person=1|Poss=Yes|PronType=Prs|Style=Form", "Case=Nom|Gender=Com|Number=Sing|POS=PRON|Person=2|PronType=Prs", "Case=Acc|Gender=Com|Number=Sing|POS=PRON|Person=2|PronType=Prs", "Gender=Com|POS=PRON|PronType=Int,Rel", "Case=Gen|Degree=Pos|Number=Plur|POS=ADJ", "Gender=Neut|Number=Sing|Number[psor]=Sing|POS=PRON|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes", "POS=VERB|VerbForm=Ger", "Gender=Com|Number=Sing|POS=PRON|PronType=Dem", "Case=Gen|POS=PRON|PronType=Int,Rel", "Mood=Ind|POS=VERB|Tense=Past|VerbForm=Fin|Voice=Pass", "Abbr=Yes|POS=X", "Case=Gen|Definite=Ind|Gender=Neut|Number=Plur|POS=NOUN", "Gender=Com|Number=Sing|Number[psor]=Sing|POS=DET|Person=2|Poss=Yes|PronType=Prs", "Definite=Ind|Number=Plur|POS=NOUN", "Foreign=Yes|POS=X", "Number=Plur|POS=PRON|PronType=Rcp", "Case=Nom|Gender=Com|Number=Plur|POS=PRON|Person=2|PronType=Prs", "Case=Gen|Degree=Cmp|POS=ADJ", "Case=Gen|Definite=Def|Gender=Neut|Number=Plur|POS=NOUN", "Case=Acc|Gender=Com|Number=Plur|POS=PRON|Person=2|PronType=Prs", "Gender=Neut|Number=Sing|POS=PRON|PronType=Dem", "Number=Plur|Number[psor]=Plur|POS=DET|Person=1|Poss=Yes|PronType=Prs|Style=Form", "Gender=Neut|Number=Sing|Number[psor]=Plur|POS=DET|Person=1|Poss=Yes|PronType=Prs|Style=Form", "Number=Plur|Number[psor]=Sing|POS=PRON|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes", "Number[psor]=Sing|POS=PRON|Person=3|Poss=Yes|PronType=Prs", "Case=Gen|Number=Plur|POS=PRON|PronType=Rcp", "POS=DET|Person=2|Polite=Form|Poss=Yes|PronType=Prs", "POS=SYM", "POS=DET|PronType=Dem", "Gender=Com|Number=Sing|POS=NUM", "Number[psor]=Plur|POS=DET|Person=2|Poss=Yes|PronType=Prs", "Case=Gen|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part", "Definite=Def|Degree=Abs|POS=ADJ", "POS=VERB|Tense=Pres", "Definite=Ind|Gender=Neut|Number=Sing|POS=NUM", "Degree=Abs|POS=ADV", "Case=Gen|Definite=Def|Degree=Pos|Number=Sing|POS=ADJ", "Gender=Com|Number=Sing|POS=PRON|PronType=Int,Rel", "POS=VERB|Tense=Past|VerbForm=Part", "Definite=Ind|Degree=Sup|Number=Sing|POS=ADJ", "Gender=Neut|Number=Sing|Number[psor]=Sing|POS=DET|Person=2|Poss=Yes|PronType=Prs", "Gender=Com|Number=Sing|Number[psor]=Sing|POS=PRON|Person=1|Poss=Yes|PronType=Prs", "Number=Plur|Number[psor]=Sing|POS=DET|Person=2|Poss=Yes|PronType=Prs", "Number[psor]=Plur|POS=PRON|Person=3|Poss=Yes|PronType=Prs", "Definite=Ind|POS=NOUN", "Case=Gen|Gender=Com|Number=Sing|POS=DET|PronType=Ind", "Definite=Ind|Gender=Com|Number=Sing|POS=NUM", "Definite=Def|Number=Plur|POS=NOUN", "Case=Gen|POS=NOUN", "POS=AUX|Tense=Pres|VerbForm=Part" ], "parser": [ "ROOT", "acl:relcl", "advcl", "advmod", "advmod:lmod", "amod", "appos", "aux", "case", "cc", "ccomp", "compound:prt", "conj", "cop", "dep", "det", "expl", "fixed", "flat", "iobj", "list", "mark", "nmod", "nmod:poss", "nsubj", "nummod", "obj", "obl", "obl:lmod", "obl:tmod", "punct", "xcomp" ], "ner": [ "LOC", "MISC", "ORG", "PER" ], "coref": [], "span_resolver": [], "entity_linker": [] }, "pipeline": [ "transformer", "tagger", "morphologizer", "trainable_lemmatizer", "parser", "ner", "coref", "span_resolver", "span_cleaner", "entity_linker" ], "components": [ "transformer", "tagger", "morphologizer", "trainable_lemmatizer", "parser", "ner", "coref", "span_resolver", "span_cleaner", "entity_linker" ], "disabled": [], "requirements": [ "spacy-transformers>=1.2.3,<1.3.0", "spacy-experimental>=0.6.2,<0.7.0" ], "performance": { "token_acc": 0.9992023928, "token_p": 0.9970089731, "token_r": 0.9977052779, "token_f": 0.9973570039, "sents_p": 0.9842105263, "sents_r": 0.992920354, "sents_f": 0.9885462555, "tag_acc": 0.9847290149, "pos_acc": 0.985677928, "morph_acc": 0.9814371257, "morph_micro_p": 0.9910058542, "morph_micro_r": 0.9876942662, "morph_micro_f": 0.989347289, "morph_per_feat": { "NumType": { "p": 0.987654321, "r": 0.9302325581, "f": 0.9580838323 }, "Degree": { "p": 0.9894736842, "r": 0.9715762274, "f": 0.9804432855 }, "Number": { "p": 0.9884148064, "r": 0.9859075536, "f": 0.987159588 }, "Definite": { "p": 0.9858490566, "r": 0.9837398374, "f": 0.9847933176 }, "Gender": { "p": 0.9869901547, "r": 0.9838766211, "f": 0.9854309286 }, "Mood": { "p": 0.9971126083, "r": 0.9942418426, "f": 0.9956751562 }, "Tense": { "p": 0.9906469213, "r": 0.9906469213, "f": 0.9906469213 }, "VerbForm": { "p": 0.9924670433, "r": 0.9918444166, "f": 0.9921556323 }, "Voice": { "p": 0.997012696, "r": 0.9955257271, "f": 0.9962686567 }, "AdpType": { "p": 0.9990689013, "r": 0.9972118959, "f": 0.9981395349 }, "PronType": { "p": 0.9954914337, "r": 0.9963898917, "f": 0.9959404601 }, "Case": { "p": 0.9968652038, "r": 0.9860465116, "f": 0.9914263445 }, "Person": { "p": 0.9930555556, "r": 0.9913344887, "f": 0.9921942758 }, "Number[psor]": { "p": 0.987804878, "r": 1.0, "f": 0.9938650307 }, "Poss": { "p": 0.987804878, "r": 1.0, "f": 0.9938650307 }, "PartType": { "p": 1.0, "r": 0.9962406015, "f": 0.9981167608 }, "Polite": { "p": 0.6666666667, "r": 0.6666666667, "f": 0.6666666667 }, "Reflex": { "p": 1.0, "r": 1.0, "f": 1.0 }, "Foreign": { "p": 0.5, "r": 0.2, "f": 0.2857142857 }, "Style": { "p": 1.0, "r": 1.0, "f": 1.0 }, "Abbr": { "p": 0.6666666667, "r": 1.0, "f": 0.8 } }, "dep_uas": 0.9083920564, "dep_las": 0.883349834, "dep_las_per_type": { "nummod": { "p": 0.7948717949, "r": 0.8230088496, "f": 0.8086956522 }, "amod": { "p": 0.897810219, "r": 0.9027522936, "f": 0.9002744739 }, "nmod": { "p": 0.7712418301, "r": 0.7729257642, "f": 0.772082879 }, "nsubj": { "p": 0.9510638298, "r": 0.946031746, "f": 0.9485411141 }, "flat": { "p": 0.9285714286, "r": 0.9680851064, "f": 0.9479166667 }, "cc": { "p": 0.8681672026, "r": 0.8940397351, "f": 0.88091354 }, "conj": { "p": 0.8862275449, "r": 0.8554913295, "f": 0.8705882353 }, "root": { "p": 0.926056338, "r": 0.9309734513, "f": 0.9285083848 }, "advmod": { "p": 0.8871715611, "r": 0.8605697151, "f": 0.8736681887 }, "mark": { "p": 0.9148471616, "r": 0.9331848552, "f": 0.9239250276 }, "aux": { "p": 0.9875389408, "r": 0.9753846154, "f": 0.9814241486 }, "ccomp": { "p": 0.7764705882, "r": 0.835443038, "f": 0.8048780488 }, "case": { "p": 0.9348986126, "r": 0.9192025184, "f": 0.926984127 }, "det": { "p": 0.9409448819, "r": 0.9637096774, "f": 0.9521912351 }, "obl": { "p": 0.8476821192, "r": 0.8114104596, "f": 0.8291497976 }, "nmod:poss": { "p": 0.8181818182, "r": 0.8256880734, "f": 0.8219178082 }, "obj": { "p": 0.8943533698, "r": 0.9352380952, "f": 0.9143389199 }, "cop": { "p": 0.8944099379, "r": 0.8834355828, "f": 0.8888888889 }, "acl:relcl": { "p": 0.8343195266, "r": 0.7704918033, "f": 0.8011363636 }, "advcl": { "p": 0.6742857143, "r": 0.7564102564, "f": 0.7129909366 }, "dep": { "p": 0.1136363636, "r": 0.3333333333, "f": 0.1694915254 }, "compound:prt": { "p": 0.6666666667, "r": 0.5882352941, "f": 0.625 }, "fixed": { "p": 0.9473684211, "r": 0.8709677419, "f": 0.9075630252 }, "iobj": { "p": 0.7692307692, "r": 0.6666666667, "f": 0.7142857143 }, "appos": { "p": 0.8181818182, "r": 0.7105263158, "f": 0.7605633803 }, "obl:tmod": { "p": 0.5, "r": 0.3125, "f": 0.3846153846 }, "advmod:lmod": { "p": 0.7678571429, "r": 0.8958333333, "f": 0.8269230769 }, "xcomp": { "p": 0.8913043478, "r": 0.640625, "f": 0.7454545455 }, "expl": { "p": 0.9230769231, "r": 0.9230769231, "f": 0.9230769231 }, "list": { "p": 0.5714285714, "r": 0.2352941176, "f": 0.3333333333 }, "obl:lmod": { "p": 0.25, "r": 0.3333333333, "f": 0.2857142857 }, "parataxis": { "p": 0.0, "r": 0.0, "f": 0.0 }, "orphan": { "p": 0.0, "r": 0.0, "f": 0.0 }, "vocative": { "p": 0.0, "r": 0.0, "f": 0.0 }, "discourse": { "p": 0.0, "r": 0.0, "f": 0.0 }, "dislocated": { "p": 0.0, "r": 0.0, "f": 0.0 }, "compound": { "p": 0.0, "r": 0.0, "f": 0.0 } }, "ents_p": 0.8708487085, "ents_r": 0.8458781362, "ents_f": 0.8581818182, "ents_per_type": { "LOC": { "p": 0.854368932, "r": 0.9166666667, "f": 0.8844221106 }, "PER": { "p": 0.9100529101, "r": 0.9555555556, "f": 0.9322493225 }, "MISC": { "p": 0.8301886792, "r": 0.7272727273, "f": 0.7753303965 }, "ORG": { "p": 0.8611111111, "r": 0.7701863354, "f": 0.8131147541 } }, "coref_lea_f1": 0.4118366346, "coref_lea_precision": 0.4889169083, "coref_lea_recall": 0.3557507008, "nel_score": 0.801242236, "nel_score_desc": "micro F", "nel_micro_p": 0.9923076923, "nel_micro_r": 0.671875, "nel_micro_f": 0.801242236, "nel_macro_p": 0.993902439, "nel_macro_r": 0.6598989464, "nel_macro_f": 0.7815238616, "nel_f_per_type": { "MISC": { "p": 1.0, "r": 0.4117647059, "f": 0.5833333333 }, "PER": { "p": 1.0, "r": 0.7540983607, "f": 0.8598130841 }, "LOC": { "p": 1.0, "r": 0.8285714286, "f": 0.90625 }, "ORG": { "p": 0.9756097561, "r": 0.6451612903, "f": 0.7766990291 } } }, "sources": [ { "name": "UD Danish DDT v2.11", "url": "https://github.com/UniversalDependencies/UD_Danish-DDT", "license": "CC BY-SA 4.0", "author": "Johannsen, Anders; Mart\u00ednez Alonso, H\u00e9ctor; Plank, Barbara" }, { "name": "DaNE", "url": "https://huggingface.co/datasets/dane", "license": "CC BY-SA 4.0", "author": "Rasmus Hvingelby, Amalie B. Pauli, Maria Barrett, Christina Rosted, Lasse M. Lidegaard, Anders S\u00f8gaard" }, { "name": "DaCoref", "url": "https://huggingface.co/datasets/alexandrainst/dacoref", "license": "CC BY-SA 4.0", "author": "Buch-Kromann, Matthias" }, { "name": "DaNED", "url": "https://danlp-alexandra.readthedocs.io/en/stable/docs/datasets.html#daned", "license": "CC BY-SA 4.0", "author": "Barrett, M. J., Lam, H., Wu, M., Lacroix, O., Plank, B., & S\u00f8gaard, A." }, { "name": "vesteinn/DanskBERT", "author": "V\u00e9steinn Sn\u00e6bjarnarson", "url": "https://huggingface.co/vesteinn/DanskBERT", "license": "MIT" } ], "notes": "\n\n### Training\nThis model was trained using [spaCy](https://spacy.io) and logged to [Weights & Biases](https://wandb.ai/kenevoldsen/dacy-v0.2.0). You can find all the training logs [here](https://wandb.ai/kenevoldsen/dacy-v0.2.0)." }