Spaces:

ahdsoft
/

persian-keyphrase-extraction

Runtime error

File size: 5,515 Bytes

import streamlit as st
import numpy as np
from pandas import DataFrame
# from keybert import KeyBERT
# For Flair (Keybert)
# from flair.embeddings import TransformerDocumentEmbeddings
import seaborn as sns
# For download buttons
from functionforDownloadButtons import download_button
import os
import json

from kpe_ranker import KpeRanker

st.set_page_config(
    page_title="استخراج عبارات کلیدی عهد",
    page_icon="🎈",
)


def _max_width_():
    max_width_str = f"max-width: 1400px;"
    st.markdown(
        f"""
    <style>
    .reportview-container .main .block-container{{
        {max_width_str}
    }}
    </style>    
    """,
        unsafe_allow_html=True,
    )


_max_width_()

c30, c31, c32 = st.columns([2.5, 1, 3])

with c30:
    # st.image("logo.png", width=400)
    st.title("🔑 استخراج عبارات کلیدی")
    st.header("")



with st.expander("ℹ️ - About this app", expanded=True):

    st.write(
        """     
-   استخراج عبارات کلیدی، محصولی نوین از شرکت عهد است که در ارزیابی‌های صورت‌گرفته، دقت بیشتری را نسبت به رقبا از خود نشان داده است.
	    """
    )

    st.markdown("")

st.markdown("")
# st.markdown("## **...**")
with st.form(key="my_form"):


    ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
    with c1:
 

        # if ModelType == "Default (DistilBERT)":
            # kw_model = KeyBERT(model=roberta)

        @st.cache_resource
        def load_model():
            return KpeRanker()

        kpe_ranker_extractor = load_model()

        # else:
        #     @st.cache(allow_output_mutation=True)
        #     def load_model():
        #         return KeyBERT("distilbert-base-nli-mean-tokens")

            # kw_model = load_model()

        top_N = st.slider(
            "# تعداد",
            min_value=1,
            max_value=30,
            value=10,
            help="You can choose the number of keywords/keyphrases to display. Between 1 and 30, default number is 10.",
        )
#         min_Ngrams = st.number_input(
#             "Minimum Ngram",
#             min_value=1,
#             max_value=4,
#             help="""The minimum value for the ngram range.

# *Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases.

# To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
#             # help="Minimum value for the keyphrase_ngram_range. keyphrase_ngram_range sets the length of the resulting keywords/keyphrases. To extract keyphrases, simply set keyphrase_ngram_range to (1, # 2) or higher depending on the number of words you would like in the resulting keyphrases.",
#         )

#         max_Ngrams = st.number_input(
#             "Maximum Ngram",
#             value=2,
#             min_value=1,
#             max_value=4,
#             help="""The maximum value for the keyphrase_ngram_range.

# *Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases.

# To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
#         )

#         StopWordsCheckbox = st.checkbox(
#             "Remove stop words",
#             help="Tick this box to remove stop words from the document (currently English only)",
#         )

        use_ner = st.checkbox(
            "NER",
            value=True,
            help="استفاده از شناسایی موجودیت‌های نام‌دار"        )

        
    with c2:
        doc = st.text_area(
            "متن خود را وارد کنید",
            height=510,
        )

        MAX_WORDS = 500
        import re
        res = len(re.findall(r"\w+", doc))
        if res > MAX_WORDS:
            st.warning(
                "⚠️ Your text contains "
                + str(res)
                + " words."
                + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
            )

            doc = doc[:MAX_WORDS]

        submit_button = st.form_submit_button(label="✨ پردازش")


if not submit_button:
    st.stop()









#################################### get keyphrases #######################################################

keywords = kpe_ranker_extractor.extract(text=doc, count=top_N, using_ner=use_ner, return_sorted=True)
# print(keywords)
st.markdown("## **🎈 Check & download results **")

st.header("")

cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

with c1:
    CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
with c2:
    CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
with c3:
    CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")

st.header("")

df = (
    DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
    .sort_values(by="Relevancy", ascending=False)
    .reset_index(drop=True)
)

df.index += 1

# Add styling
cmGreen = sns.light_palette("green", as_cmap=True)
cmRed = sns.light_palette("red", as_cmap=True)
df = df.style.background_gradient(
    cmap=cmGreen,
    subset=[
        "Relevancy",
    ],
)

c1, c2, c3 = st.columns([1, 3, 1])

format_dictionary = {
    "Relevancy": "{:.1%}",
}

df = df.format(format_dictionary)

with c2:
    st.table(df)