|
import numpy as np |
|
import pandas as pd |
|
from gensim.corpora import Dictionary, MmCorpus |
|
from gensim.models import LdaModel, Word2Vec |
|
import matplotlib.pyplot as plt |
|
import streamlit as st |
|
from pyLDAvis import prepared_data_to_html |
|
import pyLDAvis.gensim_models as gensimvis |
|
|
|
|
|
df = pd.read_csv("./raw_corpus.csv") |
|
corpus = MmCorpus('./corpus.mm') |
|
dict = Dictionary.load(f'./livedoor_demo.dict') |
|
lda = LdaModel.load('./lda_demo.model') |
|
|
|
st.caption("生データ一覧: カテゴリごとに10個を表示") |
|
|
|
for k,v in (df.groupby("CATEGORY").groups).items(): |
|
st.dataframe(df.iloc[v[:10]], height=200) |
|
|
|
st.caption("記事のカテゴリ") |
|
|
|
count = df[["CATEGORY", "DOCUMENT"]].groupby("CATEGORY").count() |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.dataframe(count) |
|
with col2: |
|
fig, ax = plt.subplots() |
|
count.plot.pie(y="DOCUMENT", ax=ax, ylabel="", legend=False) |
|
st.pyplot(fig) |
|
|
|
|
|
vis = gensimvis.prepare(lda, corpus, dict) |
|
html_string = prepared_data_to_html(vis) |
|
st.components.v1.html(html_string, width=1300, height=800) |
|
|