top2vec / app /utilities.py
derek-thomas's picture
derek-thomas HF staff
Updating topic_word
356174d
raw
history blame contribute delete
No virus
1.66 kB
from logging import getLogger
from pathlib import Path
import joblib
import pandas as pd
import streamlit as st
from top2vec import Top2Vec
logger = getLogger(__name__)
proj_dir = Path(__file__).parents[1]
def initialization():
with st.spinner("Loading app..."):
if 'model' not in st.session_state:
model = Top2Vec.load('models/model.pkl')
model._check_model_status()
model.hierarchical_topic_reduction(num_topics=20)
st.session_state.model = model
st.session_state.umap_model = joblib.load(proj_dir / 'models' / 'umap.sav')
logger.info("loading data...")
if 'data' not in st.session_state:
logger.info("loading data...")
data = pd.read_csv(proj_dir / 'data' / 'data.csv')
data['topic_id'] = data['topic_id'].apply(lambda x: f'{x:02d}')
st.session_state.data = data
st.session_state.selected_data = data
st.session_state.all_topics = list(data.topic_id.unique())
if 'topics' not in st.session_state:
logger.info("loading topics...")
topics = pd.read_csv(proj_dir / 'data' / 'topics.csv')
topics['topic_id'] = topics['topic_id'].apply(lambda x: f'{x:02d}')
st.session_state.topics = topics
topics_dict = topics[['topic_id', 'topic_0']].to_dict()
topic_str_to_word = {topics_dict['topic_id'][i]: topics_dict['topic_0'][i] for i in range(20)}
st.session_state.topic_str_to_word = topic_str_to_word
if 'selected_points' not in st.session_state:
st.session_state.selected_points = []