import streamlit as st # import numpy as np import pandas as pd from topic_modeling import TopicModeling st.set_page_config(page_title='تحلیل‌گر متن عهد', page_icon = './ahd_logo.png', layout = 'wide') @st.cache_resource def get_model(): tp_model = TopicModeling() return tp_model tp_model = get_model() col1, col2, col3 = st.columns(3) with col2: st.title("تحلیل اسناد متنی") # Upload CSV file uploaded_file = st.file_uploader("آپلود فایل") if uploaded_file is not None: filename = uploaded_file.name if filename.endswith('.xlsx'): df = pd.read_excel(uploaded_file) elif filename.endswith('.csv'): df = pd.read_csv(uploaded_file) else: raise ValueError('Unsupported file format') # Show first 10 rows of dataframe st.write(df.head(10)) # Select columns to use for topic modeling cols = st.multiselect("ستون‌های متنی موردنظر را انتخاب نمایید", df.columns) ratio = st.slider('چند درصد از کل دادگان پردازش شود',min_value=0, max_value=100) col1, col2, col3 , col4, col5 = st.columns(5) with col3: done_button = st.button("پردازش دادگان") if done_button: # print('colssssssssssssss ', cols) # Concatenate selected text columns df = df[cols] df = df.head(int(len(df) * (ratio/100))) df = df.dropna() # text = df.apply(lambda x:' '.join(x), axis=1) # Run topic modeling function col1, col2, col3 = st.columns(3) with col2: data_progress = st.spinner('در حال پردازش دادگان') with data_progress: docs = tp_model.add_data(df) st.success('پردازش دادگان با موفقیت به پایان رسید') # print('before docs') with st.spinner('در حال آموزش مدل'): # print('fittttttttttt') tp_model.fit(docs) st.success('آموزش پایان یافت') col1, col2, col3 = st.columns(3) with col3: st.title(" فضای تاپیک‌ها ") st.header("") fig = tp_model.get_vis_topics() st.plotly_chart( fig, use_container_width=True, theme="streamlit", # ✨ Optional, this is already set by default! ) col1, col2, col3 = st.columns(3) with col3: st.title(" کلمات هر تاپیک ") st.header("") fig = tp_model.get_barchart() st.plotly_chart( fig, use_container_width=True, theme="streamlit", # ✨ Optional, this is already set by default! ) col1, col2, col3 = st.columns(3) with col3: st.title("لیست تاپیک‌ها") st.header("") topics_info = tp_model.get_topic_info() st.write(topics_info) col1, col2, col3 = st.columns(3) with col3: st.title(" ابر کلمات ") st.header("") # # figs = tp_model.get_wordcloud() # topic_counts = len(tp_model.topic_model.get_topic_info()) # print('topic count ', topic_counts) # if topic_counts > 15: # topic_counts = 15 # for topic_index in range(topic_counts): # fig = tp_model.get_wordcloud_by_topic(topic_index) # if fig: figs = tp_model.get_wordcloud() for fig in figs: st.header("") st.markdown('topic:') # st.title(f'topic:{topic_index}') st.pyplot(fig)