Spaces:

ahdsoft
/

persian-keyphrase-extraction

Runtime error

App Files Files Community

persian-keyphrase-extraction / app.py

AhdCompnay

Update app.py

c920ba5 almost 2 years ago

raw

history blame contribute delete

5.52 kB

	import streamlit as st
	import numpy as np
	from pandas import DataFrame
	# from keybert import KeyBERT
	# For Flair (Keybert)
	# from flair.embeddings import TransformerDocumentEmbeddings
	import seaborn as sns
	# For download buttons
	from functionforDownloadButtons import download_button
	import os
	import json

	from kpe_ranker import KpeRanker

	st.set_page_config(
	page_title="استخراج عبارات کلیدی عهد",
	page_icon="🎈",
	)


	def _max_width_():
	max_width_str = f"max-width: 1400px;"
	st.markdown(
	f"""
	<style>
	.reportview-container .main .block-container{{
	{max_width_str}
	}}
	</style>
	""",
	unsafe_allow_html=True,
	)


	_max_width_()

	c30, c31, c32 = st.columns([2.5, 1, 3])

	with c30:
	# st.image("logo.png", width=400)
	st.title("🔑 استخراج عبارات کلیدی")
	st.header("")



	with st.expander("ℹ️ - About this app", expanded=True):

	st.write(
	"""
	- استخراج عبارات کلیدی، محصولی نوین از شرکت عهد است که در ارزیابی‌های صورت‌گرفته، دقت بیشتری را نسبت به رقبا از خود نشان داده است.
	"""
	)

	st.markdown("")

	st.markdown("")
	# st.markdown("## ...")
	with st.form(key="my_form"):


	ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
	with c1:


	# if ModelType == "Default (DistilBERT)":
	# kw_model = KeyBERT(model=roberta)

	@st.cache_resource
	def load_model():
	return KpeRanker()

	kpe_ranker_extractor = load_model()

	# else:
	# @st.cache(allow_output_mutation=True)
	# def load_model():
	# return KeyBERT("distilbert-base-nli-mean-tokens")

	# kw_model = load_model()

	top_N = st.slider(
	"# تعداد",
	min_value=1,
	max_value=30,
	value=10,
	help="You can choose the number of keywords/keyphrases to display. Between 1 and 30, default number is 10.",
	)
	# min_Ngrams = st.number_input(
	# "Minimum Ngram",
	# min_value=1,
	# max_value=4,
	# help="""The minimum value for the ngram range.

	# Keyphrase_ngram_range sets the length of the resulting keywords/keyphrases.

	# To extract keyphrases, simply set keyphrase_ngram_range to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
	# # help="Minimum value for the keyphrase_ngram_range. keyphrase_ngram_range sets the length of the resulting keywords/keyphrases. To extract keyphrases, simply set keyphrase_ngram_range to (1, # 2) or higher depending on the number of words you would like in the resulting keyphrases.",
	# )

	# max_Ngrams = st.number_input(
	# "Maximum Ngram",
	# value=2,
	# min_value=1,
	# max_value=4,
	# help="""The maximum value for the keyphrase_ngram_range.

	# Keyphrase_ngram_range sets the length of the resulting keywords/keyphrases.

	# To extract keyphrases, simply set keyphrase_ngram_range to (1, 2) or higher depending on the number of words you would like in the resulting keyphrases.""",
	# )

	# StopWordsCheckbox = st.checkbox(
	# "Remove stop words",
	# help="Tick this box to remove stop words from the document (currently English only)",
	# )

	use_ner = st.checkbox(
	"NER",
	value=True,
	help="استفاده از شناسایی موجودیت‌های نام‌دار" )


	with c2:
	doc = st.text_area(
	"متن خود را وارد کنید",
	height=510,
	)

	MAX_WORDS = 500
	import re
	res = len(re.findall(r"\w+", doc))
	if res > MAX_WORDS:
	st.warning(
	"⚠️ Your text contains "
	+ str(res)
	+ " words."
	+ " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
	)

	doc = doc[:MAX_WORDS]

	submit_button = st.form_submit_button(label="✨ پردازش")


	if not submit_button:
	st.stop()









	#################################### get keyphrases #######################################################

	keywords = kpe_ranker_extractor.extract(text=doc, count=top_N, using_ner=use_ner, return_sorted=True)
	# print(keywords)
	st.markdown("## 🎈 Check & download results ")

	st.header("")

	cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

	with c1:
	CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
	with c2:
	CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
	with c3:
	CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")

	st.header("")

	df = (
	DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
	.sort_values(by="Relevancy", ascending=False)
	.reset_index(drop=True)
	)

	df.index += 1

	# Add styling
	cmGreen = sns.light_palette("green", as_cmap=True)
	cmRed = sns.light_palette("red", as_cmap=True)
	df = df.style.background_gradient(
	cmap=cmGreen,
	subset=[
	"Relevancy",
	],
	)

	c1, c2, c3 = st.columns([1, 3, 1])

	format_dictionary = {
	"Relevancy": "{:.1%}",
	}

	df = df.format(format_dictionary)

	with c2:
	st.table(df)