File size: 3,199 Bytes
cb09dc9
 
b2fdf59
 
cb09dc9
 
 
 
 
 
 
 
 
 
 
 
b2fdf59
cb09dc9
 
b2fdf59
 
 
cb09dc9
 
b2fdf59
cb09dc9
 
 
85e8ccf
b2fdf59
cb09dc9
b2fdf59
5bead87
cb09dc9
 
 
 
85e8ccf
 
 
 
 
cb09dc9
 
 
85e8ccf
cb09dc9
 
 
85e8ccf
 
 
 
 
cb09dc9
 
 
85e8ccf
cb09dc9
85e8ccf
 
 
 
 
 
 
 
 
 
 
 
 
 
cb09dc9
 
 
 
 
85e8ccf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
from util import textproc

summary_options = ["Abstract", "Background", "Claims"]
model_names = ["huggingface/google/bigbird-pegasus-large-bigpatent",
               "huggingface/cnicu/t5-small-booksum",
               "huggingface/sshleifer/distilbart-cnn-6-6",
               "huggingface/google/pegasus-xsum"]

def init_models():
    model = dict()
    for name in model_names:
        model[name] = gr.Interface.load(name)
    return model

    
class PatentSummarizer():
    def __init__(self, model_collection):
        self.model = model_collection
        self.max_word_input = 1000


    def pipeline(self, patent_information, summaries_generated, abstract_model, \
                 background_model, claims_model, collate_claims, word_limit):

        parsed_info = textproc.retrieve_parsed_doc(patent_information, 
                                                   summaries_generated)
        if parsed_info is None:
            return ["[ERROR] Invalid patent information or timeout from scraping.", None, None]

        abstract, background, claims = parsed_info
        summaries = list()

        try:
            if "Abstract" in summaries_generated and abstract is not None:
                abstract = abstract[0: textproc.get_word_index(abstract, word_limit)]

                try:
                    abstract_summary = self.model[abstract_model](abstract)
                    abstract_summary = textproc.post_process(abstract_summary)
                except:
                    abstract_summary = None
                summaries.append(abstract_summary)
            else:
                summaries.append(None)

            if "Background" in summaries_generated and background is not None:
                background = background[0: textproc.get_word_index(background, word_limit)]

                try:
                    background_summary = self.model[background_model](background)
                    background_summary = textproc.post_process(background_summary)
                except:
                    background_summary = None
                summaries.append(background_summary)
            else:
                summaries.append(None)

            if "Claims" in summaries_generated and claims is not None:
                try:
                    if collate_claims:
                        claims = ' '.join(claims)
                        print(len(claims))
                        claims = claims[0: textproc.get_word_index(claims, word_limit)]
                        print(len(claims))
                        claims_summary = self.model[claims_model](claims)
                    else:
                        claims_summary = ''
                        for claim in claims:
                            claims_summary += self.model[claims_model](claim)
                    claims_summary = textproc.post_process(claims_summary)
                except:
                    claims_summary = None
                summaries.append(claims_summary)
            else:
                summaries.append(None)

            return summaries
        except Exception as e:
            return [f'[ERROR] {e}'] + [None]*(len(summaries_generated) - 1)