Spaces:
Sleeping
Sleeping
File size: 1,665 Bytes
626eca0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from __future__ import annotations
from dataclasses import dataclass
from typing import List, NamedTuple, Optional
from relik.reader.pytorch_modules.hf.modeling_relik import RelikReaderSample
@dataclass
class Word:
"""
A word representation that includes text, index in the sentence, POS tag, lemma,
dependency relation, and similar information.
# Parameters
text : `str`, optional
The text representation.
index : `int`, optional
The word offset in the sentence.
lemma : `str`, optional
The lemma of this word.
pos : `str`, optional
The coarse-grained part of speech of this word.
dep : `str`, optional
The dependency relation for this word.
input_id : `int`, optional
Integer representation of the word, used to pass it to a model.
token_type_id : `int`, optional
Token type id used by some transformers.
attention_mask: `int`, optional
Attention mask used by transformers, indicates to the model which tokens should
be attended to, and which should not.
"""
text: str
index: int
start_char: Optional[int] = None
end_char: Optional[int] = None
# preprocessing fields
lemma: Optional[str] = None
pos: Optional[str] = None
dep: Optional[str] = None
head: Optional[int] = None
def __str__(self):
return self.text
def __repr__(self):
return self.__str__()
class EntitySpan(NamedTuple):
start: int
end: int
label: str
text: str
@dataclass
class RelikOutput:
text: str
labels: List[EntitySpan]
windows: Optional[List[RelikReaderSample]] = None
|