mrfransis commited on
Commit
6c91d37
1 Parent(s): 6e3a821
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -3,6 +3,7 @@ from typing import AnyStr
3
  import nltk
4
  import streamlit as st
5
  from transformers import pipeline, AutoTokenizer
 
6
 
7
 
8
  def main() -> None:
@@ -23,7 +24,8 @@ def main() -> None:
23
  with st.spinner("Loading the model..."):
24
  tos_pipeline = pipeline(task="summarization",
25
  model="ML-unipi/bart-large-tos",
26
- tokenizer="ML-unipi/bart-large-tos"
 
27
  )
28
  return tos_pipeline
29
 
@@ -41,7 +43,7 @@ def main() -> None:
41
  return names
42
 
43
  def fetch_file_content(filename: str) -> AnyStr:
44
- with open(f"./samples/{filename.lower()}.txt", "r") as file:
45
  text = file.read()
46
  return text
47
 
@@ -106,8 +108,9 @@ def main() -> None:
106
  # st.markdown(str(len(token_list)))
107
  output = pipe(sentence)
108
  summary = output[0]["summary_text"]
109
- for line in summary.split(". "):
110
- if line != "":
 
111
  summary_sentences.append(line)
112
  display_summary(summary_sentences)
113
 
 
3
  import nltk
4
  import streamlit as st
5
  from transformers import pipeline, AutoTokenizer
6
+ import re
7
 
8
 
9
  def main() -> None:
 
24
  with st.spinner("Loading the model..."):
25
  tos_pipeline = pipeline(task="summarization",
26
  model="ML-unipi/bart-large-tos",
27
+ tokenizer="ML-unipi/bart-large-tos",
28
+ device=0
29
  )
30
  return tos_pipeline
31
 
 
43
  return names
44
 
45
  def fetch_file_content(filename: str) -> AnyStr:
46
+ with open(f"./samples/{filename.lower()}.txt", "r", encoding="utf-8") as file:
47
  text = file.read()
48
  return text
49
 
 
108
  # st.markdown(str(len(token_list)))
109
  output = pipe(sentence)
110
  summary = output[0]["summary_text"]
111
+
112
+ for line in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', summary):
113
+ if line.find(".") != -1:
114
  summary_sentences.append(line)
115
  display_summary(summary_sentences)
116