Spaces:

nickmuchi
/

article-text-summarizer

Running

App Files Files Community

nickmuchi commited on Jul 15, 2022

Commit

168a75c

1 Parent(s): 972218d

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -48

app.py CHANGED Viewed

@@ -198,6 +198,7 @@ def get_all_entities(text):
 def get_and_compare_entities(article_content,summary_output):
     all_entities_per_sentence = get_all_entities_per_sentence(article_content)
     entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
@@ -260,51 +261,6 @@ def highlight_entities(article_content,summary_output):
     return HTML_WRAPPER.format(soup)
-def render_dependency_parsing(text: dict):
-    html = render_sentence_custom(text, nlp)
-    html = html.replace("\n\n", "\n")
-    st.write(get_svg(html), unsafe_allow_html=True)
-def check_dependency(article: bool):
-    if article:
-        text = st.session_state.article_text
-        all_entities = get_all_entities_per_sentence(text)
-    else:
-        text = st.session_state.summary_output
-        all_entities = get_all_entities_per_sentence(text)
-    doc = nlp(text)
-    tok_l = doc.to_json()['tokens']
-    test_list_dict_output = []
-    sentences = list(doc.sents)
-    for i, sentence in enumerate(sentences):
-        start_id = sentence.start
-        end_id = sentence.end
-        for t in tok_l:
-            if t["id"] < start_id or t["id"] > end_id:
-                continue
-            head = tok_l[t['head']]
-            if t['dep'] == 'amod' or t['dep'] == "pobj":
-                object_here = text[t['start']:t['end']]
-                object_target = text[head['start']:head['end']]
-                if t['dep'] == "pobj" and str.lower(object_target) != "in":
-                    continue
-                # ONE NEEDS TO BE ENTITY
-                if object_here in all_entities[i]:
-                    identifier = object_here + t['dep'] + object_target
-                    test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
-                                                  "target_word_index": (t['head'] - sentence.start),
-                                                  "identifier": identifier, "sentence": str(sentence)})
-                elif object_target in all_entities[i]:
-                    identifier = object_here + t['dep'] + object_target
-                    test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
-                                                  "target_word_index": (t['head'] - sentence.start),
-                                                  "identifier": identifier, "sentence": str(sentence)})
-                else:
-                    continue
-    return test_list_dict_output
 def render_svg(svg_file):
     with open(svg_file, "r") as f:
@@ -378,6 +334,12 @@ def schleifer_model():
     summarizer = pipeline('summarization',model='sshleifer/distilbart-cnn-12-6')
     return summarizer
 @st.experimental_singleton(suppress_st_warning=True)
 def get_sentence_embedding_model():
@@ -399,7 +361,7 @@ nlp = get_spacy()
 st.title("Article Text and Link Extractive Summarizer 📝")
 model_type = st.sidebar.selectbox(
-    "Model type", options=["Facebook-Bart", "Sshleifer-DistilBart"]
 )
 max_len= st.sidebar.slider("Maximum length of the summarized text",min_value=80,max_value=500,step=10)
@@ -416,7 +378,8 @@ st.markdown(
 st.markdown("""
     - Facebook-Bart, trained on large [CNN and Daily Mail](https://huggingface.co/datasets/cnn_dailymail) news articles.
-    - Sshleifer-Distilbart, which is a distilled (smaller) version of the large Bart model."""
 )
 st.markdown("""Please do note that the model will take longer to generate summaries for documents that are too long.""")
@@ -499,10 +462,23 @@ if summarize:
             summarizer_model = schleifer_model()
             summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
             summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     with st.spinner("Calculating and matching entities, this takes a few seconds..."):
-        entity_match_html = highlight_entities(cleaned_text[0],summarized_text)
         st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
         st.markdown("####")

 def get_and_compare_entities(article_content,summary_output):
     all_entities_per_sentence = get_all_entities_per_sentence(article_content)
     entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
     return HTML_WRAPPER.format(soup)
 def render_svg(svg_file):
     with open(svg_file, "r") as f:
     summarizer = pipeline('summarization',model='sshleifer/distilbart-cnn-12-6')
     return summarizer
+@st.experimental_singleton(suppress_st_warning=True)
+def google_model():
+    summarizer = pipeline('summarization',model='google/pegasus-cnn_dailymail')
+    return summarizer
 @st.experimental_singleton(suppress_st_warning=True)
 def get_sentence_embedding_model():
 st.title("Article Text and Link Extractive Summarizer 📝")
 model_type = st.sidebar.selectbox(
+    "Model type", options=["Facebook-Bart", "Sshleifer-DistilBart","Google-Pegasus"]
 )
 max_len= st.sidebar.slider("Maximum length of the summarized text",min_value=80,max_value=500,step=10)
 st.markdown("""
     - Facebook-Bart, trained on large [CNN and Daily Mail](https://huggingface.co/datasets/cnn_dailymail) news articles.
+    - Sshleifer-Distilbart, which is a distilled (smaller) version of the large Bart model.
+    - Google Pegasus"""
 )
 st.markdown("""Please do note that the model will take longer to generate summaries for documents that are too long.""")
             summarizer_model = schleifer_model()
             summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
             summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
+    elif model_type == "Google-Pegasus":
+        if url_text:
+            text_to_summarize = cleaned_text
+        else:
+            text_to_summarize = cleaned_text
+        with st.spinner(
+            text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
+        ):
+            summarizer_model = google_model()
+            summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
+            summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     with st.spinner("Calculating and matching entities, this takes a few seconds..."):
+        entity_match_html = highlight_entities(cleaned_text,summarized_text)
         st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
         st.markdown("####")