derek-thomas HF staff commited on
Commit
356174d
β€’
1 Parent(s): d5f15cb

Updating topic_word

Browse files
app/pages/01_Topic_Explorer_πŸ“š.py CHANGED
@@ -44,12 +44,13 @@ def main():
44
  The `y` axis shows which words are closest to a topic centroid. The `x` axis shows how correlated they are.""")
45
 
46
  topic_num = st.sidebar.slider("Topic Number", 0, 19, value=0)
 
47
  fig = go.Figure(go.Bar(
48
  x=st.session_state.model.topic_word_scores_reduced[topic_num][::-1],
49
  y=st.session_state.model.topic_words_reduced[topic_num][::-1],
50
  orientation='h'))
51
  fig.update_layout(
52
- title=f'Words for Topic {topic_num}',
53
  yaxis_title='Top 20 topic words',
54
  xaxis_title='Distance to topic centroid'
55
  )
 
44
  The `y` axis shows which words are closest to a topic centroid. The `x` axis shows how correlated they are.""")
45
 
46
  topic_num = st.sidebar.slider("Topic Number", 0, 19, value=0)
47
+ topic_num_str = f"{topic_num:02}"
48
  fig = go.Figure(go.Bar(
49
  x=st.session_state.model.topic_word_scores_reduced[topic_num][::-1],
50
  y=st.session_state.model.topic_words_reduced[topic_num][::-1],
51
  orientation='h'))
52
  fig.update_layout(
53
+ title=f'Words for Topic {topic_num_str}: {st.session_state.topic_str_to_word[topic_num_str]}',
54
  yaxis_title='Top 20 topic words',
55
  xaxis_title='Distance to topic centroid'
56
  )
app/pages/02_Document_Explorer_πŸ“–.py CHANGED
@@ -72,6 +72,7 @@ def main():
72
  st.button("Reset", help="Will Reset the selected points and the selected topics", on_click=reset)
73
  data_to_model = st.session_state.data.sort_values(by='topic_id',
74
  ascending=True) # to make legend sorted https://bioinformatics.stackexchange.com/a/18847
 
75
  fig = px.scatter(data_to_model, x='x', y='y', color='topic_id', template='plotly_dark',
76
  hover_data=['id', 'topic_id', 'x', 'y'])
77
  st.session_state.selected_points = plotly_events(fig, select_event=True, click_event=False)
@@ -84,7 +85,9 @@ def main():
84
  filter_df()
85
  cols = ['id', 'topic_id', 'documents']
86
  data = st.session_state.selected_data[cols]
87
- builder = GridOptionsBuilder.from_dataframe(data)
 
 
88
  builder.configure_pagination()
89
  go = builder.build()
90
  AgGrid(st.session_state.selected_data[cols], theme='streamlit', gridOptions=go,
 
72
  st.button("Reset", help="Will Reset the selected points and the selected topics", on_click=reset)
73
  data_to_model = st.session_state.data.sort_values(by='topic_id',
74
  ascending=True) # to make legend sorted https://bioinformatics.stackexchange.com/a/18847
75
+ data_to_model['topic_id'].replace(st.session_state.topic_str_to_word, inplace=True)
76
  fig = px.scatter(data_to_model, x='x', y='y', color='topic_id', template='plotly_dark',
77
  hover_data=['id', 'topic_id', 'x', 'y'])
78
  st.session_state.selected_points = plotly_events(fig, select_event=True, click_event=False)
 
85
  filter_df()
86
  cols = ['id', 'topic_id', 'documents']
87
  data = st.session_state.selected_data[cols]
88
+ data['topic_word'] = data.topic_id.replace(st.session_state.topic_str_to_word)
89
+ ordered_cols = ['id', 'topic_id', 'topic_word', 'documents']
90
+ builder = GridOptionsBuilder.from_dataframe(data[ordered_cols])
91
  builder.configure_pagination()
92
  go = builder.build()
93
  AgGrid(st.session_state.selected_data[cols], theme='streamlit', gridOptions=go,
app/pages/03_Semantic_Search_πŸ”.py CHANGED
@@ -89,7 +89,9 @@ def main():
89
 
90
  with tab1:
91
  cols = ['id', 'document_scores', 'topic_id', 'documents']
92
- builder = GridOptionsBuilder.from_dataframe(st.session_state.data_to_model_without_point.loc[:, cols])
 
 
93
  builder.configure_pagination()
94
  builder.configure_column('document_scores', type=["numericColumn", "numberColumnFilter", "customNumericFormat"],
95
  precision=2)
 
89
 
90
  with tab1:
91
  cols = ['id', 'document_scores', 'topic_id', 'documents']
92
+ data = st.session_state.data_to_model_without_point.loc[:, cols]
93
+ data['topic_word'] = data.topic_id.replace(st.session_state.topic_str_to_word)
94
+ builder = GridOptionsBuilder.from_dataframe(data)
95
  builder.configure_pagination()
96
  builder.configure_column('document_scores', type=["numericColumn", "numberColumnFilter", "customNumericFormat"],
97
  precision=2)
app/utilities.py CHANGED
@@ -35,6 +35,9 @@ def initialization():
35
  topics = pd.read_csv(proj_dir / 'data' / 'topics.csv')
36
  topics['topic_id'] = topics['topic_id'].apply(lambda x: f'{x:02d}')
37
  st.session_state.topics = topics
 
 
 
38
 
39
  if 'selected_points' not in st.session_state:
40
  st.session_state.selected_points = []
 
35
  topics = pd.read_csv(proj_dir / 'data' / 'topics.csv')
36
  topics['topic_id'] = topics['topic_id'].apply(lambda x: f'{x:02d}')
37
  st.session_state.topics = topics
38
+ topics_dict = topics[['topic_id', 'topic_0']].to_dict()
39
+ topic_str_to_word = {topics_dict['topic_id'][i]: topics_dict['topic_0'][i] for i in range(20)}
40
+ st.session_state.topic_str_to_word = topic_str_to_word
41
 
42
  if 'selected_points' not in st.session_state:
43
  st.session_state.selected_points = []