import numpy as np import pandas as pd from gensim.corpora import Dictionary, MmCorpus from gensim.models import LdaModel, Word2Vec import matplotlib.pyplot as plt import streamlit as st from pyLDAvis import prepared_data_to_html import pyLDAvis.gensim_models as gensimvis # 生データ・コーパス・辞書・モデルのロード df = pd.read_csv("./raw_corpus.csv") corpus = MmCorpus('./corpus.mm') dict = Dictionary.load(f'./livedoor_demo.dict') lda = LdaModel.load('./lda_demo.model') st.caption("生データ一覧") st.dataframe(df.iloc[:100]) st.caption("記事のカテゴリ") fig, ax = plt.subplots() count = df[["CATEGORY", "DOCUMENT"]].groupby("CATEGORY").count() count.plot.pie(y="DOCUMENT", ax=ax, ylabel="", legend=False) st.pyplot(fig) # pyLDAvisによるトピックの可視化 vis = gensimvis.prepare(lda, corpus, dict) html_string = prepared_data_to_html(vis) st.components.v1.html(html_string, width=1300, height=800)