Spaces:
Running
Running
import streamlit as st | |
# import numpy as np | |
import pandas as pd | |
from topic_modeling import TopicModeling | |
st.set_page_config(page_title='تحلیلگر متن عهد', page_icon = './ahd_logo.png', layout = 'wide') | |
def get_model(): | |
tp_model = TopicModeling() | |
return tp_model | |
tp_model = get_model() | |
col1, col2, col3 = st.columns(3) | |
with col2: | |
st.title("تحلیل اسناد متنی") | |
# Upload CSV file | |
uploaded_file = st.file_uploader("آپلود فایل") | |
if uploaded_file is not None: | |
filename = uploaded_file.name | |
if filename.endswith('.xlsx'): | |
df = pd.read_excel(uploaded_file) | |
elif filename.endswith('.csv'): | |
df = pd.read_csv(uploaded_file) | |
else: | |
raise ValueError('Unsupported file format') | |
# Show first 10 rows of dataframe | |
st.write(df.head(10)) | |
# Select columns to use for topic modeling | |
cols = st.multiselect("ستونهای متنی موردنظر را انتخاب نمایید", df.columns) | |
ratio = st.slider('چند درصد از کل دادگان پردازش شود',min_value=0, max_value=100) | |
col1, col2, col3 , col4, col5 = st.columns(5) | |
with col3: | |
done_button = st.button("پردازش دادگان") | |
if done_button: | |
# print('colssssssssssssss ', cols) | |
# Concatenate selected text columns | |
df = df[cols] | |
df = df.head(int(len(df) * (ratio/100))) | |
df = df.dropna() | |
# text = df.apply(lambda x:' '.join(x), axis=1) | |
# Run topic modeling function | |
col1, col2, col3 = st.columns(3) | |
with col2: | |
data_progress = st.spinner('در حال پردازش دادگان') | |
with data_progress: | |
docs = tp_model.add_data(df) | |
st.success('پردازش دادگان با موفقیت به پایان رسید') | |
# print('before docs') | |
with st.spinner('در حال آموزش مدل'): | |
# print('fittttttttttt') | |
tp_model.fit(docs) | |
st.success('آموزش پایان یافت') | |
col1, col2, col3 = st.columns(3) | |
with col3: | |
st.title(" فضای تاپیکها ") | |
st.header("") | |
fig = tp_model.get_vis_topics() | |
st.plotly_chart( | |
fig, | |
use_container_width=True, | |
theme="streamlit", # ✨ Optional, this is already set by default! | |
) | |
col1, col2, col3 = st.columns(3) | |
with col3: | |
st.title(" کلمات هر تاپیک ") | |
st.header("") | |
fig = tp_model.get_barchart() | |
st.plotly_chart( | |
fig, | |
use_container_width=True, | |
theme="streamlit", # ✨ Optional, this is already set by default! | |
) | |
col1, col2, col3 = st.columns(3) | |
with col3: | |
st.title("لیست تاپیکها") | |
st.header("") | |
topics_info = tp_model.get_topic_info() | |
st.write(topics_info) | |
col1, col2, col3 = st.columns(3) | |
with col3: | |
st.title(" ابر کلمات ") | |
st.header("") | |
# # figs = tp_model.get_wordcloud() | |
# topic_counts = len(tp_model.topic_model.get_topic_info()) | |
# print('topic count ', topic_counts) | |
# if topic_counts > 15: | |
# topic_counts = 15 | |
# for topic_index in range(topic_counts): | |
# fig = tp_model.get_wordcloud_by_topic(topic_index) | |
# if fig: | |
figs = tp_model.get_wordcloud() | |
for fig in figs: | |
st.header("") | |
st.markdown('topic:') | |
# st.title(f'topic:{topic_index}') | |
st.pyplot(fig) | |