Spaces:
Runtime error
Runtime error
import streamlit as st | |
import numpy as np | |
from pandas import DataFrame | |
# from keybert import KeyBERT | |
# For Flair (Keybert) | |
# from flair.embeddings import TransformerDocumentEmbeddings | |
import seaborn as sns | |
# For download buttons | |
from functionforDownloadButtons import download_button | |
import os | |
import json | |
from kpe_ranker import KpeRanker | |
st.set_page_config( | |
page_title="استخراج عبارات کلیدی عهد", | |
page_icon="🎈", | |
) | |
def _max_width_(): | |
max_width_str = f"max-width: 1400px;" | |
st.markdown( | |
f""" | |
<style> | |
.reportview-container .main .block-container{{ | |
{max_width_str} | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
_max_width_() | |
c30, c31, c32 = st.columns([2.5, 1, 3]) | |
with c30: | |
# st.image("logo.png", width=400) | |
st.title("🔑 استخراج عبارات کلیدی") | |
st.header("") | |
with st.expander("ℹ️ - About this app", expanded=True): | |
st.write( | |
""" | |
- استخراج عبارات کلیدی، محصولی نوین از شرکت عهد است که در ارزیابیهای صورتگرفته، دقت بیشتری را نسبت به رقبا از خود نشان داده است. | |
""" | |
) | |
st.markdown("") | |
st.markdown("") | |
# st.markdown("## **...**") | |
with st.form(key="my_form"): | |
ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07]) | |
with c1: | |
# if ModelType == "Default (DistilBERT)": | |
# kw_model = KeyBERT(model=roberta) | |
def load_model(): | |
return KpeRanker() | |
kpe_ranker_extractor = load_model() | |
# else: | |
# @st.cache(allow_output_mutation=True) | |
# def load_model(): | |
# return KeyBERT("distilbert-base-nli-mean-tokens") | |
# kw_model = load_model() | |
top_N = st.slider( | |
"# تعداد", | |
min_value=1, | |
max_value=30, | |
value=10, | |
help="You can choose the number of keywords/keyphrases to display. Between 1 and 30, default number is 10.", | |
) | |
# min_Ngrams = st.number_input( | |
# "Minimum Ngram", | |
# min_value=1, | |
# max_value=4, | |
# help="""The minimum value for the ngram range. | |
# *Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases. | |
# To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""", | |
# # help="Minimum value for the keyphrase_ngram_range. keyphrase_ngram_range sets the length of the resulting keywords/keyphrases. To extract keyphrases, simply set keyphrase_ngram_range to (1, # 2) or higher depending on the number of words you would like in the resulting keyphrases.", | |
# ) | |
# max_Ngrams = st.number_input( | |
# "Maximum Ngram", | |
# value=2, | |
# min_value=1, | |
# max_value=4, | |
# help="""The maximum value for the keyphrase_ngram_range. | |
# *Keyphrase_ngram_range* sets the length of the resulting keywords/keyphrases. | |
# To extract keyphrases, simply set *keyphrase_ngram_range* to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""", | |
# ) | |
# StopWordsCheckbox = st.checkbox( | |
# "Remove stop words", | |
# help="Tick this box to remove stop words from the document (currently English only)", | |
# ) | |
use_ner = st.checkbox( | |
"NER", | |
value=True, | |
help="استفاده از شناسایی موجودیتهای نامدار" ) | |
with c2: | |
doc = st.text_area( | |
"متن خود را وارد کنید", | |
height=510, | |
) | |
MAX_WORDS = 500 | |
import re | |
res = len(re.findall(r"\w+", doc)) | |
if res > MAX_WORDS: | |
st.warning( | |
"⚠️ Your text contains " | |
+ str(res) | |
+ " words." | |
+ " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊" | |
) | |
doc = doc[:MAX_WORDS] | |
submit_button = st.form_submit_button(label="✨ پردازش") | |
if not submit_button: | |
st.stop() | |
#################################### get keyphrases ####################################################### | |
keywords = kpe_ranker_extractor.extract(text=doc, count=top_N, using_ner=use_ner, return_sorted=True) | |
# print(keywords) | |
st.markdown("## **🎈 Check & download results **") | |
st.header("") | |
cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2]) | |
with c1: | |
CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)") | |
with c2: | |
CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)") | |
with c3: | |
CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)") | |
st.header("") | |
df = ( | |
DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"]) | |
.sort_values(by="Relevancy", ascending=False) | |
.reset_index(drop=True) | |
) | |
df.index += 1 | |
# Add styling | |
cmGreen = sns.light_palette("green", as_cmap=True) | |
cmRed = sns.light_palette("red", as_cmap=True) | |
df = df.style.background_gradient( | |
cmap=cmGreen, | |
subset=[ | |
"Relevancy", | |
], | |
) | |
c1, c2, c3 = st.columns([1, 3, 1]) | |
format_dictionary = { | |
"Relevancy": "{:.1%}", | |
} | |
df = df.format(format_dictionary) | |
with c2: | |
st.table(df) | |