File size: 2,770 Bytes
7e3e85d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from typing import List, Union


class SummModel:
    """
    Base model class for SummerTime
    """

    # static variables
    model_name = "None"
    is_extractive = False
    is_neural = False
    is_query_based = False
    is_dialogue_based = False
    is_multi_document = False

    def __init__(
        self,
        trained_domain: str = None,
        max_input_length: int = None,
        max_output_length: int = None,
    ):
        self.trained_domain = trained_domain
        self.max_input_length = max_input_length
        self.max_output_length = max_output_length

    def summarize(
        self, corpus: Union[List[str], List[List[str]]], queries: List[str] = None
    ) -> List[str]:
        """
        All summarization models should have this function

        :param corpus: each string in the list is a source document to be summarized; if the model is multi-document or
            dialogue summarization model, then each instance contains a list of documents/utterances
        :param queries: a list of queries if this is a query-based model
        :return: a list of generated summaries
        """
        raise NotImplementedError(
            "The base class for models shouldn't be instantiated!"
        )

    @classmethod
    def assert_summ_input_type(
        cls, corpus: Union[List[str], List[List[str]]], queries: Union[List[str], None]
    ):
        """
        Verifies that type of input corpus or queries for summarization align with the model type.
        """
        raise NotImplementedError(
            "The base class for models shouldn't be instantiated!"
        )

    @classmethod
    def show_capability(cls) -> None:
        """
        Use concise language to show the strength and weakness for each model. Try not to use NLP terminologies
        """
        raise NotImplementedError(
            "The base class for models shouldn't be instantiated!"
        )

    @classmethod
    def generate_basic_description(cls) -> str:
        """
        Automatically generate the basic description string based on the attributes
        """
        extractive_abstractive = "extractive" if cls.is_extractive else "abstractive"
        neural = "neural" if cls.is_neural else "non-neural"

        basic_description = (
            f"{cls.model_name} is a"
            f"{'query-based' if cls.is_query_based else ''} "
            f"{extractive_abstractive}, {neural} model for summarization."
        )
        if cls.is_multi_document or cls.is_dialogue_based:
            basic_description += (
                f"It can handle {'multi-document' if cls.is_multi_document else ''} "
                f"{'dialogue' if cls.is_dialogue_based else ''} textual data."
            )

        return basic_description