Spaces:

ML-unipi
/

TermsOfServiceSummarization

Runtime error

mrfransis commited on Aug 29, 2022

Commit

6c91d37

•

1 Parent(s): 6e3a821

add fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import AnyStr
 import nltk
 import streamlit as st
 from transformers import pipeline, AutoTokenizer
 def main() -> None:
@@ -23,7 +24,8 @@ def main() -> None:
         with st.spinner("Loading the model..."):
             tos_pipeline = pipeline(task="summarization",
                                     model="ML-unipi/bart-large-tos",
-                                    tokenizer="ML-unipi/bart-large-tos"
                                     )
         return tos_pipeline
@@ -41,7 +43,7 @@ def main() -> None:
         return names
     def fetch_file_content(filename: str) -> AnyStr:
-        with open(f"./samples/{filename.lower()}.txt", "r") as file:
             text = file.read()
         return text
@@ -106,8 +108,9 @@ def main() -> None:
                     # st.markdown(str(len(token_list)))
                     output = pipe(sentence)
                     summary = output[0]["summary_text"]
-                    for line in summary.split(". "):
-                        if line != "":
                             summary_sentences.append(line)
                 display_summary(summary_sentences)

 import nltk
 import streamlit as st
 from transformers import pipeline, AutoTokenizer
+import re
 def main() -> None:
         with st.spinner("Loading the model..."):
             tos_pipeline = pipeline(task="summarization",
                                     model="ML-unipi/bart-large-tos",
+                                    tokenizer="ML-unipi/bart-large-tos",
+                                    device=0
                                     )
         return tos_pipeline
         return names
     def fetch_file_content(filename: str) -> AnyStr:
+        with open(f"./samples/{filename.lower()}.txt", "r", encoding="utf-8") as file:
             text = file.read()
         return text
                     # st.markdown(str(len(token_list)))
                     output = pipe(sentence)
                     summary = output[0]["summary_text"]
+                    for line in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', summary):
+                        if line.find(".") != -1:
                             summary_sentences.append(line)
                 display_summary(summary_sentences)