Spaces:
Build error
Build error
# non-overlapped spans generated by CSL. Can be considered as annotations for the NER task | |
tags_ent = [ | |
"citation-number", | |
"citation-label", | |
"family", | |
"given", | |
"title", | |
"container-title", | |
"issued", | |
"url", | |
"publisher", | |
"page", | |
"doi", | |
"publisher-place", | |
"number-of-pages", | |
"collection-title", | |
"collection-number", | |
"genre", | |
"authority", | |
"URL", | |
"DOI", | |
"volume", | |
# "title-short", it is a valid tag, but we ended up with the only one in the dataset... | |
"number", | |
"note", | |
"archive", | |
"archive_location", | |
] | |
# spans which may enclose other annotated spans. Spacy allows to store overlapped spans within doc.spans | |
tags_span = [ | |
"author", | |
"year", | |
"month", | |
"day", | |
"issued", | |
"url", | |
"bib", | |
] + tags_ent | |
# span tag used for adding sentence boundaries annotations: an annotated CSL style encloses each bib item with <bib>..</bib> | |
tag_sentence_start = "bib" | |
spankey_sentence_start = "sc" | |