Spaces:

ahdsoft
/

Persian-Topic-Modeling

Running

App Files Files Community

Persian-Topic-Modeling / app.py

AhdCompnay

Update app.py

8083f62 11 months ago

raw history blame contribute delete

No virus

3.71 kB

	import streamlit as st
	# import numpy as np
	import pandas as pd
	from topic_modeling import TopicModeling
	st.set_page_config(page_title='تحلیل‌گر متن عهد', page_icon = './ahd_logo.png', layout = 'wide')
	@st.cache_resource
	def get_model():
	tp_model = TopicModeling()
	return tp_model

	tp_model = get_model()



	col1, col2, col3 = st.columns(3)
	with col2:
	st.title("تحلیل اسناد متنی")

	# Upload CSV file
	uploaded_file = st.file_uploader("آپلود فایل")
	if uploaded_file is not None:
	filename = uploaded_file.name
	if filename.endswith('.xlsx'):
	df = pd.read_excel(uploaded_file)
	elif filename.endswith('.csv'):
	df = pd.read_csv(uploaded_file)
	else:
	raise ValueError('Unsupported file format')

	# Show first 10 rows of dataframe
	st.write(df.head(10))

	# Select columns to use for topic modeling
	cols = st.multiselect("ستون‌های متنی موردنظر را انتخاب نمایید", df.columns)
	ratio = st.slider('چند درصد از کل دادگان پردازش شود',min_value=0, max_value=100)
	col1, col2, col3 , col4, col5 = st.columns(5)
	with col3:
	done_button = st.button("پردازش دادگان")
	if done_button:
	# print('colssssssssssssss ', cols)
	# Concatenate selected text columns
	df = df[cols]
	df = df.head(int(len(df) * (ratio/100)))
	df = df.dropna()
	# text = df.apply(lambda x:' '.join(x), axis=1)

	# Run topic modeling function
	col1, col2, col3 = st.columns(3)
	with col2:
	data_progress = st.spinner('در حال پردازش دادگان')
	with data_progress:
	docs = tp_model.add_data(df)
	st.success('پردازش دادگان با موفقیت به پایان رسید')
	# print('before docs')
	with st.spinner('در حال آموزش مدل'):
	# print('fittttttttttt')
	tp_model.fit(docs)
	st.success('آموزش پایان یافت')
	col1, col2, col3 = st.columns(3)
	with col3:
	st.title(" فضای تاپیک‌ها ")
	st.header("")
	fig = tp_model.get_vis_topics()
	st.plotly_chart(
	fig,
	use_container_width=True,
	theme="streamlit", # ✨ Optional, this is already set by default!
	)
	col1, col2, col3 = st.columns(3)
	with col3:
	st.title(" کلمات هر تاپیک ")
	st.header("")
	fig = tp_model.get_barchart()
	st.plotly_chart(
	fig,
	use_container_width=True,
	theme="streamlit", # ✨ Optional, this is already set by default!
	)


	col1, col2, col3 = st.columns(3)
	with col3:
	st.title("لیست تاپیک‌ها")
	st.header("")
	topics_info = tp_model.get_topic_info()
	st.write(topics_info)

	col1, col2, col3 = st.columns(3)
	with col3:
	st.title(" ابر کلمات ")
	st.header("")
	# # figs = tp_model.get_wordcloud()
	# topic_counts = len(tp_model.topic_model.get_topic_info())
	# print('topic count ', topic_counts)
	# if topic_counts > 15:
	# topic_counts = 15
	# for topic_index in range(topic_counts):
	# fig = tp_model.get_wordcloud_by_topic(topic_index)
	# if fig:
	figs = tp_model.get_wordcloud()
	for fig in figs:
	st.header("")
	st.markdown('topic:')
	# st.title(f'topic:{topic_index}')
	st.pyplot(fig)