Christian Koch commited on
Commit
8a6bb0b
1 Parent(s): 0177922

add mission function

Browse files
Files changed (2) hide show
  1. .idea/.gitignore +8 -0
  2. fill_in_summary.py +66 -0
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Datasource local storage ignored files
5
+ /dataSources/
6
+ /dataSources.local.xml
7
+ # Editor-based HTTP Client requests
8
+ /httpRequests/
fill_in_summary.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ class FillInSummary:
3
+ """Organizing Summarization and Subsequent Fill-In-Task."""
4
+
5
+ def __init__(self):
6
+ """Initialize Class with Summarizer and NER-Model."""
7
+ # Refer to https://huggingface.co/docs/transformers/v4.18.0/en/main_classes/pipelines#transformers.SummarizationPipeline
8
+ # for further information about configuration.
9
+ self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
+ # Using default model: https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english
11
+ self.ner = ner= pipeline("ner", aggregation_strategy='simple')
12
+
13
+ def summarize(self, text: str) -> str:
14
+ """Summarize given Text.
15
+
16
+ Parameter
17
+ ---------
18
+ text : str
19
+ Text to be summarized. Must not exceeds BART's maximal input length.
20
+
21
+ Returns
22
+ -------
23
+ str
24
+ Summary
25
+ """
26
+ # Refer to https://huggingface.co/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig
27
+ # for further configuration of of the
28
+ output: list = self.summarizer(
29
+ text,
30
+ max_length=400,
31
+ min_length=100,
32
+ do_sample=False)
33
+ return output[0]['summary_text']
34
+
35
+ def blank_ne_out(self, text: str) -> dict():
36
+ """Blank out named entities.
37
+
38
+ Transforms 'X did this.' to {
39
+ 'text': '_ did this',
40
+ 'ner': [{
41
+ 'end': 1,
42
+ 'entity_group': 'ORG',
43
+ 'score': 0.73085225,
44
+ 'start': 0,
45
+ 'word': 'X'
46
+ }]}
47
+
48
+ Parameter
49
+ ---------
50
+ text : str
51
+ Summarized text.
52
+
53
+ Returns
54
+ -------
55
+ dict
56
+ Entails blanked out text and recognized named entity list.
57
+ """
58
+ ner_list: list = self.ner(text)
59
+ output_str: str = text
60
+ for start, end in map(lambda e : (e['start'], e['end']), ner_list):
61
+ length: int = end - start
62
+ output_str = output_str[0:start] + ("_" * length) + output_str[end::]
63
+ return {
64
+ 'text': output_str,
65
+ 'ner': ner_list
66
+ }