AhdCompnay's picture
Update app.py
8083f62
import streamlit as st
# import numpy as np
import pandas as pd
from topic_modeling import TopicModeling
st.set_page_config(page_title='تحلیل‌گر متن عهد', page_icon = './ahd_logo.png', layout = 'wide')
@st.cache_resource
def get_model():
tp_model = TopicModeling()
return tp_model
tp_model = get_model()
col1, col2, col3 = st.columns(3)
with col2:
st.title("تحلیل اسناد متنی")
# Upload CSV file
uploaded_file = st.file_uploader("آپلود فایل")
if uploaded_file is not None:
filename = uploaded_file.name
if filename.endswith('.xlsx'):
df = pd.read_excel(uploaded_file)
elif filename.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
raise ValueError('Unsupported file format')
# Show first 10 rows of dataframe
st.write(df.head(10))
# Select columns to use for topic modeling
cols = st.multiselect("ستون‌های متنی موردنظر را انتخاب نمایید", df.columns)
ratio = st.slider('چند درصد از کل دادگان پردازش شود',min_value=0, max_value=100)
col1, col2, col3 , col4, col5 = st.columns(5)
with col3:
done_button = st.button("پردازش دادگان")
if done_button:
# print('colssssssssssssss ', cols)
# Concatenate selected text columns
df = df[cols]
df = df.head(int(len(df) * (ratio/100)))
df = df.dropna()
# text = df.apply(lambda x:' '.join(x), axis=1)
# Run topic modeling function
col1, col2, col3 = st.columns(3)
with col2:
data_progress = st.spinner('در حال پردازش دادگان')
with data_progress:
docs = tp_model.add_data(df)
st.success('پردازش دادگان با موفقیت به پایان رسید')
# print('before docs')
with st.spinner('در حال آموزش مدل'):
# print('fittttttttttt')
tp_model.fit(docs)
st.success('آموزش پایان یافت')
col1, col2, col3 = st.columns(3)
with col3:
st.title(" فضای تاپیک‌ها ")
st.header("")
fig = tp_model.get_vis_topics()
st.plotly_chart(
fig,
use_container_width=True,
theme="streamlit", # ✨ Optional, this is already set by default!
)
col1, col2, col3 = st.columns(3)
with col3:
st.title(" کلمات هر تاپیک ")
st.header("")
fig = tp_model.get_barchart()
st.plotly_chart(
fig,
use_container_width=True,
theme="streamlit", # ✨ Optional, this is already set by default!
)
col1, col2, col3 = st.columns(3)
with col3:
st.title("لیست تاپیک‌ها")
st.header("")
topics_info = tp_model.get_topic_info()
st.write(topics_info)
col1, col2, col3 = st.columns(3)
with col3:
st.title(" ابر کلمات ")
st.header("")
# # figs = tp_model.get_wordcloud()
# topic_counts = len(tp_model.topic_model.get_topic_info())
# print('topic count ', topic_counts)
# if topic_counts > 15:
# topic_counts = 15
# for topic_index in range(topic_counts):
# fig = tp_model.get_wordcloud_by_topic(topic_index)
# if fig:
figs = tp_model.get_wordcloud()
for fig in figs:
st.header("")
st.markdown('topic:')
# st.title(f'topic:{topic_index}')
st.pyplot(fig)