Spaces:

jian-mo
/

E2E-QA-mining

Build error

App Files Files Community

jian-mo commited on Apr 2, 2022

Commit

a3d290e

1 Parent(s): 9397200

Create app.py

Browse files

Files changed (1) hide show

app.py +196 -0

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import streamlit as st
+import numpy as np
+from pandas import DataFrame
+from keybert import KeyBERT
+# For Flair (Keybert)
+from flair.embeddings import TransformerDocumentEmbeddings
+import seaborn as sns
+# For download buttons
+from functionforDownloadButtons import download_button
+import os
+import json
+from transformers import pipeline
+st.set_page_config(
+    page_title="E2E QA MINING",
+    page_icon="?",
+)
+def _max_width_():
+    max_width_str = f"max-width: 1400px;"
+    st.markdown(
+        f"""
+    <style>
+    .reportview-container .main .block-container{{
+        {max_width_str}
+    }}
+    </style>
+    """,
+        unsafe_allow_html=True,
+    )
+_max_width_()
+c30, c31, c32 = st.columns([2.5, 1, 3])
+with c30:
+    # st.image("logo.png", width=400)
+    st.title("🔑 E2E QA MINING")
+    st.header("")
+with st.expander("ℹ️ - About this app", expanded=True):
+    st.write(
+        """
+-   The *E2E QA MINING$ app helps you mine question-answer pairs from a given context.
+	    """
+    )
+    st.markdown("")
+st.markdown("")
+st.markdown("## **📌 Paste document **")
+with st.form(key="my_form"):
+    ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
+    with c1:
+        kw_model = pipeline('text2text-generation', model='mojians/E2E-QA-mining')
+        top_N = st.slider(
+            "# of results",
+            min_value=1,
+            max_value=30,
+            value=10,
+            help="You can choose the number of keywords/keyphrases to display. Between 1 and 30, default number is 10.",
+        )
+        min_Ngrams = st.number_input(
+            "Minimum Ngram",
+            min_value=1,
+            max_value=4,
+            help="""The minimum value for the ngram range.
+*Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases.
+To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
+            # help="Minimum value for the keyphrase_ngram_range. keyphrase_ngram_range sets the length of the resulting keywords/keyphrases. To extract keyphrases, simply set keyphrase_ngram_range to (1, # 2) or higher depending on the number of words you would like in the resulting keyphrases.",
+        )
+        max_Ngrams = st.number_input(
+            "Maximum Ngram",
+            value=2,
+            min_value=1,
+            max_value=4,
+            help="""The maximum value for the keyphrase_ngram_range.
+*Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases.
+To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
+        )
+        StopWordsCheckbox = st.checkbox(
+            "Remove stop words",
+            help="Tick this box to remove stop words from the document (currently English only)",
+        )
+        use_MMR = st.checkbox(
+            "Use MMR",
+            value=True,
+            help="You can use Maximal Margin Relevance (MMR) to diversify the results. It creates keywords/keyphrases based on cosine similarity. Try high/low 'Diversity' settings below for interesting variations.",
+        )
+        Diversity = st.slider(
+            "Keyword diversity (MMR only)",
+            value=0.5,
+            min_value=0.0,
+            max_value=1.0,
+            step=0.1,
+            help="""The higher the setting, the more diverse the keywords.
+Note that the *Keyword diversity* slider only works if the *MMR* checkbox is ticked.
+""",
+        )
+    with c2:
+        doc = st.text_area(
+            "Paste your text below (max 500 words)",
+            height=510,
+        )
+        MAX_WORDS = 500
+        import re
+        res = len(re.findall(r"\w+", doc))
+        if res > MAX_WORDS:
+            st.warning(
+                "⚠️ Your text contains "
+                + str(res)
+                + " words."
+                + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
+            )
+            doc = doc[:MAX_WORDS]
+        submit_button = st.form_submit_button(label="✨ Get me the data!")
+    if use_MMR:
+        mmr = True
+    else:
+        mmr = False
+    if StopWordsCheckbox:
+        StopWords = "english"
+    else:
+        StopWords = None
+if not submit_button:
+    st.stop()
+if min_Ngrams > max_Ngrams:
+    st.warning("min_Ngrams can't be greater than max_Ngrams")
+    st.stop()
+keywords = kw_model("context:"+doc+ "generate questions and answers:", do_sample=True, min_length=50,max_length=300)
+st.markdown("## **🎈 Check & download results **")
+st.header("")
+cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])
+with c1:
+    CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
+with c2:
+    CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
+with c3:
+    CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")
+st.header("")
+df = (
+    DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
+        .sort_values(by="Relevancy", ascending=False)
+        .reset_index(drop=True)
+)
+df.index += 1
+# Add styling
+cmGreen = sns.light_palette("green", as_cmap=True)
+cmRed = sns.light_palette("red", as_cmap=True)
+df = df.style.background_gradient(
+    cmap=cmGreen,
+    subset=[
+        "Relevancy",
+    ],
+)
+c1, c2, c3 = st.columns([1, 3, 1])
+format_dictionary = {
+    "Relevancy": "{:.1%}",
+}
+df = df.format(format_dictionary)
+with c2:
+    st.table(df)