Spaces:
Build error
Build error
File size: 1,000 Bytes
ff7710f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# non-overlapped spans generated by CSL. Can be considered as annotations for the NER task
tags_ent = [
"citation-number",
"citation-label",
"family",
"given",
"title",
"container-title",
"issued",
"url",
"publisher",
"page",
"doi",
"publisher-place",
"number-of-pages",
"collection-title",
"collection-number",
"genre",
"authority",
"URL",
"DOI",
"volume",
# "title-short", it is a valid tag, but we ended up with the only one in the dataset...
"number",
"note",
"archive",
"archive_location",
]
# spans which may enclose other annotated spans. Spacy allows to store overlapped spans within doc.spans
tags_span = [
"author",
"year",
"month",
"day",
"issued",
"url",
"bib",
] + tags_ent
# span tag used for adding sentence boundaries annotations: an annotated CSL style encloses each bib item with <bib>..</bib>
tag_sentence_start = "bib"
spankey_sentence_start = "sc"
|