Spaces:

riccorl
/

relik-entity-linking

Sleeping

File size: 1,665 Bytes

626eca0

from __future__ import annotations

from dataclasses import dataclass
from typing import List, NamedTuple, Optional

from relik.reader.pytorch_modules.hf.modeling_relik import RelikReaderSample


@dataclass
class Word:
    """
    A word representation that includes text, index in the sentence, POS tag, lemma,
    dependency relation, and similar information.

    # Parameters
    text : `str`, optional
        The text representation.
    index : `int`, optional
        The word offset in the sentence.
    lemma : `str`, optional
        The lemma of this word.
    pos : `str`, optional
        The coarse-grained part of speech of this word.
    dep : `str`, optional
        The dependency relation for this word.

    input_id : `int`, optional
        Integer representation of the word, used to pass it to a model.
    token_type_id : `int`, optional
        Token type id used by some transformers.
    attention_mask: `int`, optional
        Attention mask used by transformers, indicates to the model which tokens should
        be attended to, and which should not.
    """

    text: str
    index: int
    start_char: Optional[int] = None
    end_char: Optional[int] = None
    # preprocessing fields
    lemma: Optional[str] = None
    pos: Optional[str] = None
    dep: Optional[str] = None
    head: Optional[int] = None

    def __str__(self):
        return self.text

    def __repr__(self):
        return self.__str__()


class EntitySpan(NamedTuple):
    start: int
    end: int
    label: str
    text: str


@dataclass
class RelikOutput:
    text: str
    labels: List[EntitySpan]
    windows: Optional[List[RelikReaderSample]] = None