from transformers import pipeline class FillInSummary: """Organizing Summarization and Subsequent Fill-In-Task.""" def __init__(self): """Initialize Class with Summarizer and NER-Model.""" # Refer to https://huggingface.co/docs/transformers/v4.18.0/en/main_classes/pipelines#transformers.SummarizationPipeline # for further information about configuration. self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Using default model: https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english self.ner = ner= pipeline("ner", aggregation_strategy='simple') def summarize(self, text: str) -> str: """Summarize given Text. Parameter --------- text : str Text to be summarized. Must not exceeds BART's maximal input length. Returns ------- str Summary """ # Refer to https://huggingface.co/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig # for further configuration of of the output: list = self.summarizer( text, max_length=400, min_length=100, do_sample=False) return output[0]['summary_text'] def blank_ne_out(self, text: str) -> dict(): """Blank out named entities. Transforms 'X did this.' to { 'text': '_ did this', 'ner': [{ 'end': 1, 'entity_group': 'ORG', 'score': 0.73085225, 'start': 0, 'word': 'X' }]} Parameter --------- text : str Summarized text. Returns ------- dict Entails blanked out text and recognized named entity list. """ ner_list: list = self.ner(text) output_str: str = text for start, end in map(lambda e : (e['start'], e['end']), ner_list): length: int = end - start output_str = output_str[0:start] + ("_" * length) + output_str[end::] return { 'text': output_str, 'ner': ner_list }