Spaces:

ML-unipi
/

TermsOfServiceSummarization

Running

App Files Files Community

tommasobaldi commited on Aug 29, 2022

Commit

cb047cb

•

1 Parent(s): eda5d31

add summary cleaning function

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -25,10 +25,19 @@ def main() -> None:
             tos_pipeline = pipeline(task="summarization",
                                     model="ML-unipi/bart-large-tos",
                                     tokenizer="ML-unipi/bart-large-tos",
-                                    device=0
                                     )
         return tos_pipeline
     def display_summary(summary_sentences: list) -> None:
         st.subheader("Summary :male-detective:")
         for sentence in summary_sentences:
@@ -56,7 +65,6 @@ def main() -> None:
         cumulative_token_length = 0
         for sentence in sentences:
-            # token_list = [token for token in nltk.word_tokenize(sentence)]
             token_list = tokenizer(sentence, max_length=1024, truncation=True)
             token_length = len(token_list["input_ids"])
             if token_length > 10:
@@ -103,16 +111,9 @@ def main() -> None:
                                                             split_token_length=1024
                                                             )
                 for sentence in sentences:
-                    # token_list = [token for token in nltk.word_tokenize(sentence)]
-                    # st.markdown(sentence)
-                    # st.markdown(str(len(token_list)))
                     output = pipe(sentence)
                     summary = output[0]["summary_text"]
-                    for line in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', summary):
-                        if line.find(".") != -1:
-                            line = line.replace("..", ".")
-                            summary_sentences.append(line)
                 display_summary(summary_sentences)

             tos_pipeline = pipeline(task="summarization",
                                     model="ML-unipi/bart-large-tos",
                                     tokenizer="ML-unipi/bart-large-tos",
                                     )
         return tos_pipeline
+    def clean_summaries(text: str) -> list:
+        result = []
+        lines = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
+        for line in lines:
+            if line.find(".") != -1:
+                line = line.replace("..", ".")
+                result.append(line)
+        return result
     def display_summary(summary_sentences: list) -> None:
         st.subheader("Summary :male-detective:")
         for sentence in summary_sentences:
         cumulative_token_length = 0
         for sentence in sentences:
             token_list = tokenizer(sentence, max_length=1024, truncation=True)
             token_length = len(token_list["input_ids"])
             if token_length > 10:
                                                             split_token_length=1024
                                                             )
                 for sentence in sentences:
                     output = pipe(sentence)
                     summary = output[0]["summary_text"]
+                    summary_sentences += clean_summaries(summary)
                 display_summary(summary_sentences)