AhdCompnay's picture
Update app.py
c920ba5
import streamlit as st
import numpy as np
from pandas import DataFrame
# from keybert import KeyBERT
# For Flair (Keybert)
# from flair.embeddings import TransformerDocumentEmbeddings
import seaborn as sns
# For download buttons
from functionforDownloadButtons import download_button
import os
import json
from kpe_ranker import KpeRanker
st.set_page_config(
page_title="استخراج عبارات کلیدی عهد",
page_icon="🎈",
)
def _max_width_():
max_width_str = f"max-width: 1400px;"
st.markdown(
f"""
<style>
.reportview-container .main .block-container{{
{max_width_str}
}}
</style>
""",
unsafe_allow_html=True,
)
_max_width_()
c30, c31, c32 = st.columns([2.5, 1, 3])
with c30:
# st.image("logo.png", width=400)
st.title("🔑 استخراج عبارات کلیدی")
st.header("")
with st.expander("ℹ️ - About this app", expanded=True):
st.write(
"""
- استخراج عبارات کلیدی، محصولی نوین از شرکت عهد است که در ارزیابی‌های صورت‌گرفته، دقت بیشتری را نسبت به رقبا از خود نشان داده است.
"""
)
st.markdown("")
st.markdown("")
# st.markdown("## **...**")
with st.form(key="my_form"):
ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
with c1:
# if ModelType == "Default (DistilBERT)":
# kw_model = KeyBERT(model=roberta)
@st.cache_resource
def load_model():
return KpeRanker()
kpe_ranker_extractor = load_model()
# else:
# @st.cache(allow_output_mutation=True)
# def load_model():
# return KeyBERT("distilbert-base-nli-mean-tokens")
# kw_model = load_model()
top_N = st.slider(
"# تعداد",
min_value=1,
max_value=30,
value=10,
help="You can choose the number of keywords/keyphrases to display. Between 1 and 30, default number is 10.",
)
# min_Ngrams = st.number_input(
# "Minimum Ngram",
# min_value=1,
# max_value=4,
# help="""The minimum value for the ngram range.
# *Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases.
# To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
# # help="Minimum value for the keyphrase_ngram_range. keyphrase_ngram_range sets the length of the resulting keywords/keyphrases. To extract keyphrases, simply set keyphrase_ngram_range to (1, # 2) or higher depending on the number of words you would like in the resulting keyphrases.",
# )
# max_Ngrams = st.number_input(
# "Maximum Ngram",
# value=2,
# min_value=1,
# max_value=4,
# help="""The maximum value for the keyphrase_ngram_range.
# *Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases.
# To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
# )
# StopWordsCheckbox = st.checkbox(
# "Remove stop words",
# help="Tick this box to remove stop words from the document (currently English only)",
# )
use_ner = st.checkbox(
"NER",
value=True,
help="استفاده از شناسایی موجودیت‌های نام‌دار" )
with c2:
doc = st.text_area(
"متن خود را وارد کنید",
height=510,
)
MAX_WORDS = 500
import re
res = len(re.findall(r"\w+", doc))
if res > MAX_WORDS:
st.warning(
"⚠️ Your text contains "
+ str(res)
+ " words."
+ " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
)
doc = doc[:MAX_WORDS]
submit_button = st.form_submit_button(label="✨ پردازش")
if not submit_button:
st.stop()
#################################### get keyphrases #######################################################
keywords = kpe_ranker_extractor.extract(text=doc, count=top_N, using_ner=use_ner, return_sorted=True)
# print(keywords)
st.markdown("## **🎈 Check & download results **")
st.header("")
cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])
with c1:
CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
with c2:
CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
with c3:
CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")
st.header("")
df = (
DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
.sort_values(by="Relevancy", ascending=False)
.reset_index(drop=True)
)
df.index += 1
# Add styling
cmGreen = sns.light_palette("green", as_cmap=True)
cmRed = sns.light_palette("red", as_cmap=True)
df = df.style.background_gradient(
cmap=cmGreen,
subset=[
"Relevancy",
],
)
c1, c2, c3 = st.columns([1, 3, 1])
format_dictionary = {
"Relevancy": "{:.1%}",
}
df = df.format(format_dictionary)
with c2:
st.table(df)