khunamir commited on
Commit
3bdb790
1 Parent(s): bfdd99d

First Commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ Viz
CosmosData/Data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d412f2c213194de8fad35f72b2b89a2296791586efc2896cc37ae24d8cfee0
3
+ size 6097905
CosmosData/SciData.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8000870576a832b5687cbc9f79c6bf58d0fc812b86732627753369eacd7c39cd
3
+ size 12995836
CosmosData/SciTechData.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11ac166393d5c6c9a6fc0d5300402df108c73853ebaa3c00654703bc0d485fad
3
+ size 18207515
CosmosData/TechData.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a71736ae1115b557e58472c0e2cf0726ad0b02ddd22daeb30c49c4dca9b7220
3
+ size 6101253
CosmosData/bow_corpus.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236080fb44c7b688b1939b8463029b082ea37c27c872fe39a3c378395eafea49
3
+ size 5583422
CosmosData/dictionary.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365560d76985137c95f2007dc40e7f6fef511693ef26b7b1ad1393a9b539143c
3
+ size 290617
CosmosData/preprocessed_scitech.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2dd754280f5543895399e85b3c78aa295a73c8dda23dadbc6db85fb155efa09
3
+ size 13832946
CosmosData/test_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbc5902b11327dbb9edd8c5c254d5c7aa35e0a6e8eca4f2f67cf5b659169039
3
+ size 2874718
CosmosData/train_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b3018b3d5e26d7e3ca900adf8ade7a05dfe6f143e7b1bc024a642311ad49ba
3
+ size 11024839
app.py ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ import math
6
+ import gensim
7
+ import pickle
8
+ import pyLDAvis
9
+ import pyLDAvis.gensim_models as gensimvis
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ import matplotlib.pyplot as plt
13
+ import matplotlib.colors as mcolors
14
+ from bokeh.plotting import figure, output_file, show
15
+ from bokeh.models import Label
16
+ from bokeh.io import output_notebook
17
+ from plotly.subplots import make_subplots
18
+ from pandasgui import show
19
+ from sklearn.manifold import TSNE
20
+ from sklearn.model_selection import train_test_split
21
+ from gensim.parsing.preprocessing import STOPWORDS
22
+ from wordcloud import WordCloud
23
+
24
+ colors = ['peachpuff','lightskyblue','turquoise','darkorange','purple','olive','lightgreen','darkseagreen','maroon','teal',
25
+ 'deepskyblue','red','mediumblue','indigo','goldenrod','mediumvioletred','pink','beige','rosybrown']
26
+
27
+ st.set_page_config(layout="wide")
28
+
29
+ st.markdown("<h1 style='font-weight: normal'><b>Topic Model</b>: Science and Technology News</h1>", unsafe_allow_html=True)
30
+
31
+ def load_mpmt(site):
32
+ with open(f'./Models/{site}Models/{site.lower()}_lda_passes_train.pickle', 'rb') as file:
33
+ model_passes = pickle.load(file)
34
+
35
+ with open(f'./Models/{site}Models/{site.lower()}_lda_topics_train.pickle', 'rb') as file:
36
+ model_topics = pickle.load(file)
37
+
38
+ mp_df = pd.DataFrame(model_passes)
39
+ mp_df = mp_df.transpose()
40
+ mp_df = mp_df.iloc[0:50]
41
+ mp_df['coherence'] = mp_df['coherence'].astype(float)
42
+
43
+ mt_df = pd.DataFrame(model_topics)
44
+ mt_df = mt_df.transpose()
45
+ mt_df = mt_df.iloc[0:50]
46
+ mt_df['coherence'] = mt_df['coherence'].astype(float)
47
+
48
+ return mp_df, mt_df
49
+
50
+ def load_ex(site):
51
+ with open(f'./Models/{site}Models/{site.lower()}_extreme2.pickle', 'rb') as file:
52
+ model_extreme = pickle.load(file)
53
+
54
+ ex_df = pd.DataFrame(model_extreme)
55
+ ex_df = ex_df.transpose()
56
+ ex_df['coherence'] = ex_df['coherence'].astype(float)
57
+ ex_df = ex_df.reset_index()
58
+
59
+ best_model = ex_df.iloc[ex_df['coherence'].idxmax()]['model']
60
+ bow_corpus = ex_df.iloc[ex_df['coherence'].idxmax()]['corpus']
61
+ dictionary = ex_df.iloc[ex_df['coherence'].idxmax()]['dictionary']
62
+
63
+ return ex_df, best_model, bow_corpus, dictionary
64
+
65
+ def load_model(site):
66
+ with open(f'./{site}Data/preprocessed_scitech.pkl', 'rb') as file:
67
+ processed_series = pickle.load(file)
68
+
69
+ return processed_series
70
+
71
+ def load_related(site, bow_corpus, highest_top):
72
+ with open(f"./{site}Data/SciTechData.pkl", "rb") as file:
73
+ news = pickle.load(file)
74
+
75
+ dm_topic = []
76
+
77
+ for i, corp in enumerate(bow_corpus):
78
+ topic_percs = best_model.get_document_topics(corp)
79
+ dominant_topic = sorted(topic_percs, key = lambda x: x[1], reverse=True)[0][0]
80
+ dm_topic.append(dominant_topic)
81
+
82
+ news['dominant_topic'] = dm_topic
83
+
84
+ return news[news['dominant_topic'] == highest_top]['url'][:10]
85
+
86
+ def load_evaluation_graph(data, xlabel, ylabel, title):
87
+ if (len(data) > 25):
88
+ fig = px.line(data, x=range(1, len(data)+1), y='coherence', title=title, labels={'x': xlabel, 'y': ylabel})
89
+ fig.add_hline(y=data['coherence'].max())
90
+ try:
91
+ vert_value = int(data['coherence'].idxmax().split('a')[1])
92
+ except:
93
+ vert_value = int(data['coherence'].idxmax().split('s')[1])
94
+ else:
95
+ fig = px.line(data[::-1], x=range(30, 100, 10), y='coherence', title=title, labels={'x': xlabel, 'y': ylabel})
96
+ vert_value = int(data.reset_index()['coherence'].idxmax())
97
+ fig.update_xaxes(range=[30, 90])
98
+
99
+ fig.add_vline(x=vert_value)
100
+
101
+ return fig, vert_value
102
+
103
+ def load_cloud(processed_series):
104
+ all_words = ''
105
+ stopwords = set(STOPWORDS)
106
+
107
+ for val in processed_series:
108
+ all_words += ' '.join(val)+' '
109
+
110
+ wordcloud = WordCloud(width = 1800, height = 1600,
111
+ background_color ='white',
112
+ stopwords = stopwords,
113
+ min_font_size = 10).generate(all_words)
114
+
115
+ # fig = plt.figure(figsize = (8, 8), facecolor = None)
116
+ # ax = fig.add_axes([2, 2, 10, 10])
117
+ # ax.imshow(wordcloud)
118
+ # ax.axis("off")
119
+ # fig.tight_layout(pad = 0)
120
+
121
+ fig = px.imshow(wordcloud)
122
+
123
+ return fig
124
+
125
+ def load_cloud_each(model, site):
126
+ if site == 'Popular Science' or site == 'Cosmos Magazine':
127
+ words = ['u']
128
+ elif site == 'Discover Magazine':
129
+ words = ['nt', 'u', 've', 'm', 'll', 'd', 'rofl']
130
+
131
+ stopwords = set(STOPWORDS)
132
+
133
+ for i in words:
134
+ stopwords.add(i)
135
+
136
+ num_topics = len(model.get_topics())
137
+
138
+ topic_top3words = [(i, topic) for i, topics in model.show_topics(formatted=False, num_topics=num_topics) for j, (topic, wt) in enumerate(topics) if j < 3]
139
+
140
+ k=0
141
+ new_list = []
142
+ new_new_list = []
143
+
144
+ j = 0
145
+ while (j < len(topic_top3words)):
146
+ i = topic_top3words[j][1]
147
+
148
+ if(j == len(topic_top3words)-1):
149
+ new_new_list.append(new_list)
150
+
151
+ if(k<3):
152
+ j += 1
153
+ else:
154
+ new_new_list.append(new_list)
155
+ new_list = []
156
+ k = 0
157
+ continue
158
+ new_list.append(i)
159
+ k += 1
160
+
161
+ cloud = WordCloud(stopwords=stopwords,
162
+ background_color='white',
163
+ width=750,
164
+ height=750,
165
+ max_words=10,
166
+ colormap='tab10',
167
+ color_func=lambda *args, **kwargs: color_func(*args, **kwargs, n=n, topics=new_new_list[n]),
168
+ prefer_horizontal=1.0)
169
+
170
+ topics = model.show_topics(num_topics=num_topics, formatted=False)
171
+
172
+ j = 0
173
+ n = 0
174
+ col1, col2, col3, col4, col5 = st.columns(5)
175
+
176
+ while n < num_topics:
177
+ if (j < 5):
178
+ if (j == 0):
179
+ col = col1
180
+ elif (j == 1):
181
+ col = col2
182
+ elif (j == 2):
183
+ col = col3
184
+ elif (j == 3):
185
+ col = col4
186
+ elif (j == 4):
187
+ col = col5
188
+ else:
189
+ j = 0
190
+ col1, col2, col3, col4, col5 = st.columns(5)
191
+ continue
192
+
193
+ with col:
194
+ fig = plt.figure(figsize=(1.5,1.5))
195
+ plt.title('Topic ' + str(n+1), fontdict=dict(size=6))
196
+ plt.axis('off')
197
+ topic_words = dict(topics[n][1])
198
+ cloud.generate_from_frequencies(topic_words, max_font_size=400)
199
+ plt.imshow(cloud)
200
+ st.write(fig)
201
+
202
+ j += 1
203
+ n += 1
204
+
205
+ def load_LDAvis(model, corpus, dictionary):
206
+ vis = gensimvis.prepare(model, corpus, dictionary)
207
+ html_string = pyLDAvis.prepared_data_to_html(vis)
208
+
209
+ return html_string
210
+
211
+ def load_topic_document_count(best_model, bow_corpus):
212
+ dm_topic = []
213
+
214
+ for i, corp in enumerate(bow_corpus):
215
+ topic_percs = best_model.get_document_topics(corp)
216
+ dominant_topic = sorted(topic_percs, key = lambda x: x[1], reverse=True)[0][0]
217
+ dm_topic.append(dominant_topic)
218
+
219
+ dm_df = pd.DataFrame(dm_topic, columns=['dominant_topic'])
220
+
221
+ topic_top3words = [(i, topic) for i, topics in best_model.show_topics(formatted=False, num_topics=-1) for j, (topic, wt) in enumerate(topics) if j < 3]
222
+
223
+ df_top3words_stacked = pd.DataFrame(topic_top3words, columns=['topic_id', 'words'])
224
+ df_top3words = df_top3words_stacked.groupby('topic_id').agg(', '.join)
225
+ df_top3words.reset_index(level=0,inplace=True)
226
+
227
+ count_df = pd.DataFrame(dm_df.groupby('dominant_topic').dominant_topic.agg('count').to_frame('COUNT').reset_index()['COUNT'])
228
+ count_df['top3'] = list(df_top3words['words'])
229
+
230
+ fig = px.histogram(dm_df,
231
+ x='dominant_topic',
232
+ labels={'dominant_topic': 'Dominant topic', 'count': 'Number of Documents'},
233
+ height=500,
234
+ width=1400,
235
+ title='Documents Count by Dominant Topic')
236
+ fig.update_layout(yaxis_title='Number of Documents', bargap=0.2)
237
+ fig.update_layout(
238
+ margin=dict(b=40),
239
+ xaxis = dict(
240
+ tickmode = 'array',
241
+ tickvals = list(range(dm_df['dominant_topic'].max()+1)),
242
+ ticktext = df_top3words['words']
243
+ )
244
+ )
245
+
246
+ return fig, count_df[count_df['COUNT'] == count_df['COUNT'].max()]['top3'].values[0], count_df['COUNT'].idxmax()
247
+
248
+ def load_document_count(data):
249
+ doc_len = [len(d) for d in data]
250
+
251
+ fifth = round(np.quantile(doc_len, q=0.05))
252
+ ninefifth = round(np.quantile(doc_len, q=0.95))
253
+
254
+ text = "Mean : " + str(round(np.mean(doc_len))) \
255
+ + "<br>Median : " + str(round(np.median(doc_len))) \
256
+ + "<br>Std dev. : " + str(round(np.std(doc_len))) \
257
+ + "<br>5th percentile : " + str(round(np.quantile(doc_len, q=0.05))) \
258
+ + "<br>95th percentile : " + str(round(np.quantile(doc_len, q=0.95)))
259
+
260
+ fig = px.histogram(doc_len, labels={"value": "Document Word Count"}, height=500, width=1400, title='Distribution of Documents Word Count')
261
+ fig.add_annotation(x=0.95, xref='paper', y=0.95, yref='paper', text=text, showarrow=False, bgcolor="#F4F4F4", opacity=0.8, borderpad=8, borderwidth=2, bordercolor="#DDDDDD", align='left')
262
+ fig.update_layout(yaxis_title='Number of Documents', showlegend=False)
263
+
264
+ return fig, fifth, ninefifth
265
+
266
+ def color_func(word, font_size, position, orientation, font_path, random_state, n, topics):
267
+ if word in topics:
268
+ return colors[n]
269
+ else:
270
+ return 'lightgrey'
271
+
272
+ def load_topic_word_prob(best_model):
273
+ topic_prob_list = [i[1].split(',') for i in best_model.show_topics(num_topics=-1)]
274
+
275
+ prob_list = []
276
+ words_list = []
277
+
278
+ for i in topic_prob_list:
279
+ num_list = re.findall(r'[\d]*[.][\d]+', *i)
280
+ conv = [float(j) for j in num_list]
281
+ prob_list.append(conv)
282
+
283
+ words = re.findall(r'"(.*?)"', *i)
284
+ words_list.append(words)
285
+
286
+ def flatten(l):
287
+ return [item for sublist in l for item in sublist]
288
+
289
+ words_list = flatten(words_list)
290
+ topnum_list = sorted(list(range(best_model.num_topics)) * 10)
291
+ prob_list = flatten(prob_list)
292
+
293
+ data = {
294
+ "topic": topnum_list,
295
+ "words": words_list,
296
+ "probability": prob_list
297
+ }
298
+
299
+ topic_prob = pd.DataFrame(data)
300
+ new_df = topic_prob.set_index(['topic'])
301
+
302
+ rows = math.ceil(best_model.num_topics / 5)
303
+
304
+ fig = make_subplots(
305
+ rows=rows,
306
+ cols=5,
307
+ shared_yaxes=True,
308
+ subplot_titles=[f'Topic {n}' for n in range(1, best_model.num_topics+1)]
309
+ )
310
+
311
+ j = 1
312
+ n = 0
313
+
314
+ for i in range(1, rows+1):
315
+ for j in range(1, 6):
316
+ if (n < best_model.num_topics):
317
+ fig.add_trace(
318
+ go.Bar(x=new_df.loc[n]['words'], y=new_df.loc[n]['probability']),
319
+ row=i, col=j
320
+ )
321
+
322
+ n += 1
323
+
324
+ fig.update_layout(height=1000, width=1400, title_text="Topic Word Probabilities", showlegend=False, margin=dict(b=5))
325
+
326
+ return fig
327
+
328
+ def load_tSNE(best_model, bow_corpus):
329
+ # Get topic weights
330
+ topic_weights = []
331
+ for i, row_list in enumerate(best_model[bow_corpus]):
332
+ topic_weights.append([w for i, w in row_list])
333
+
334
+ # Array of topic weights
335
+ arr = pd.DataFrame(topic_weights).fillna(0).values
336
+
337
+ # Keep the well separated points (optional)
338
+ arr = arr[np.amax(arr, axis=1) > 0.35]
339
+
340
+ # Dominant topic number in each doc
341
+ topic_num = np.argmax(arr, axis=1)
342
+
343
+ # tSNE Dimension Reduction
344
+ tsne_model = TSNE(n_components=2, verbose=1, random_state=0, angle=.99, init='pca')
345
+ tsne_lda = tsne_model.fit_transform(arr)
346
+
347
+ # Plot the Topic Clusters using Bokeh
348
+ colors = ['peachpuff','lightskyblue','turquoise','darkorange','purple','olive','lightgreen','darkseagreen','maroon','teal',
349
+ 'deepskyblue','red','mediumblue','indigo','goldenrod','mediumvioletred','pink','beige','rosybrown']
350
+ n_topics = 4
351
+ mycolors = np.array([color for color in colors])
352
+ plot = figure(title="t-SNE Clustering of {} LDA Topics".format(n_topics),
353
+ plot_width=900, plot_height=700)
354
+ plot.scatter(x=tsne_lda[:,0], y=tsne_lda[:,1], color=mycolors[topic_num])
355
+
356
+ return plot
357
+
358
+ site = st.selectbox(
359
+ 'Select which site to analyze topics',
360
+ ('Popular Science', 'Discover Magazine', 'Cosmos Magazine'),
361
+ )
362
+
363
+ vert_space = '<div style="padding: 20px 5px;"></div>'
364
+ st.markdown(vert_space, unsafe_allow_html=True)
365
+
366
+ if site:
367
+ if site == 'Popular Science':
368
+ site = 'PopSci'
369
+ elif site == 'Discover Magazine':
370
+ site = 'Discover'
371
+ elif site == 'Cosmos Magazine':
372
+ site = 'Cosmos'
373
+
374
+ mp_df, mt_df = load_mpmt(site)
375
+
376
+ st.subheader("How good is the model?")
377
+
378
+ passes_graph, passes_vert = load_evaluation_graph(mp_df, 'Number of Passes', 'Topic Coherence', 'Topic Coherence vs Number of Passes' )
379
+ passes_graph.update_layout(width=650)
380
+
381
+ topics_graph, topics_vert = load_evaluation_graph(mt_df, 'Number of Topics', 'Topic Coherence', 'Topic Coherence vs Number of Topics' )
382
+ topics_graph.update_layout(width=650)
383
+
384
+ mdt_best = round(mt_df['coherence'].max(),4)
385
+
386
+ st.markdown(f"The **:blue[best performing model]** obtained a coherence score of **:blue[{mdt_best}]** ! \n \
387
+ The model performed best with {passes_vert} iterations over the whole corpus and {topics_vert} number of topics.")
388
+
389
+ col1, col2 = st.columns(2)
390
+
391
+ with col1:
392
+ st.write(passes_graph)
393
+
394
+ with col2:
395
+ st.write(topics_graph)
396
+
397
+ ex_df, best_model, bow_corpus, dictionary = load_ex(site)
398
+
399
+ st.subheader("The model were also found to be performing better when extreme word occurrences are filtered!")
400
+
401
+ ex_best = round(ex_df['coherence'].max(), 4)
402
+ imp = round(ex_best / mdt_best, 4)
403
+
404
+ st.markdown(f"This time, the **:blue[best performing model]** obtained a coherence score of **:blue[{ex_best}]**. \n \
405
+ An increase of another **:blue[{imp}]**% !")
406
+
407
+ best_graph, best_vert = load_evaluation_graph(ex_df, 'Percentage of Documents Used to Filter', 'Topic Coherence', 'Topic Coherence vs Percentage of Documents' )
408
+
409
+ best_graph.update_layout(width=1400)
410
+
411
+ st.write(best_graph)
412
+
413
+ #col1, col2 = st.columns(2)
414
+
415
+ processed_series = load_model(site)
416
+
417
+ if site == 'PopSci':
418
+ site = 'Popular Science'
419
+ elif site == 'Discover':
420
+ site = 'Discover Magazine'
421
+ elif site == 'Cosmos':
422
+ site = 'Cosmos Magazine'
423
+
424
+ document_count, fifth, ninefifth = load_document_count(processed_series)
425
+ topic_document_count, top_3, top_i = load_topic_document_count(best_model, bow_corpus)
426
+
427
+ top_3 = top_3.split(',')
428
+
429
+ st.subheader("How long are the documents?")
430
+
431
+ st.markdown(f"Most documents in {site} are between **:blue[{fifth}]** and **:blue[{ninefifth}]** words long!")
432
+
433
+ st.write(document_count)
434
+
435
+ st.subheader(f"What are the most discussed topics in {site}?")
436
+
437
+ st.markdown(f"The most discussed topics are related to the keywords **:blue[{top_3[0].upper()}]**, **:blue[{top_3[1].upper()}]** and **:blue[{top_3[2].upper()}]**")
438
+ st.write(topic_document_count)
439
+
440
+ if site == 'Popular Science':
441
+ site = 'PopSci'
442
+ elif site == 'Discover Magazine':
443
+ site = 'Discover'
444
+ elif site == 'Cosmos Magazine':
445
+ site = 'Cosmos'
446
+
447
+ related_url = load_related(site, bow_corpus, top_i)
448
+
449
+ st.subheader("These articles have the highest probability of having above topic!")
450
+
451
+ st.markdown('<div style="padding: 25px 5px;"></div>', unsafe_allow_html=True)
452
+
453
+ st.write(related_url, width=1000)
454
+
455
+ st.markdown('<div style="padding: 25px 5px;"></div>', unsafe_allow_html=True)
456
+
457
+ st.subheader("Explore the topics below!")
458
+
459
+ st.markdown(vert_space, unsafe_allow_html=True)
460
+
461
+ if site == 'PopSci':
462
+ site = 'Popular Science'
463
+ elif site == 'Discover':
464
+ site = 'Discover Magazine'
465
+ elif site == 'Cosmos':
466
+ site = 'Cosmos Magazine'
467
+
468
+ load_cloud_each(best_model, site)
469
+
470
+ st.markdown('<div style="padding: 40px 5px;"></div>', unsafe_allow_html=True)
471
+
472
+ lda_vis = load_LDAvis(best_model, bow_corpus, dictionary)
473
+ #st.write(lda_vis)
474
+
475
+ st.subheader("LDAVis Visualization")
476
+ st.markdown('<div style="padding: 20px 5px;"></div>', unsafe_allow_html=True)
477
+ st.components.v1.html(lda_vis, height=1100, width=1400)