Spaces:
Sleeping
Sleeping
Yara Kyrychenko
commited on
Commit
•
17e20d0
1
Parent(s):
8bf791d
Helper
Browse files- .DS_Store +0 -0
- dicts/.DS_Store +0 -0
- dicts/stopwords.txt +485 -0
- helper.py +183 -0
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
dicts/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
dicts/stopwords.txt
ADDED
@@ -0,0 +1,485 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
doesn't
|
2 |
+
was
|
3 |
+
he's
|
4 |
+
shall
|
5 |
+
didn't
|
6 |
+
for
|
7 |
+
my
|
8 |
+
being
|
9 |
+
hers
|
10 |
+
no
|
11 |
+
whom
|
12 |
+
yours
|
13 |
+
but
|
14 |
+
to
|
15 |
+
where
|
16 |
+
shouldn't
|
17 |
+
their
|
18 |
+
again
|
19 |
+
would
|
20 |
+
not
|
21 |
+
been
|
22 |
+
wasn't
|
23 |
+
against
|
24 |
+
ought
|
25 |
+
also
|
26 |
+
with
|
27 |
+
at
|
28 |
+
www
|
29 |
+
same
|
30 |
+
then
|
31 |
+
this
|
32 |
+
him
|
33 |
+
haven't
|
34 |
+
is
|
35 |
+
most
|
36 |
+
there
|
37 |
+
ourselves
|
38 |
+
her
|
39 |
+
otherwise
|
40 |
+
some
|
41 |
+
after
|
42 |
+
you
|
43 |
+
what
|
44 |
+
until
|
45 |
+
hence
|
46 |
+
our
|
47 |
+
only
|
48 |
+
shan't
|
49 |
+
they
|
50 |
+
should
|
51 |
+
before
|
52 |
+
having
|
53 |
+
since
|
54 |
+
get
|
55 |
+
had
|
56 |
+
http
|
57 |
+
they'll
|
58 |
+
on
|
59 |
+
your
|
60 |
+
i've
|
61 |
+
and
|
62 |
+
yourselves
|
63 |
+
those
|
64 |
+
them
|
65 |
+
such
|
66 |
+
i'd
|
67 |
+
in
|
68 |
+
when's
|
69 |
+
don't
|
70 |
+
where's
|
71 |
+
she'd
|
72 |
+
we
|
73 |
+
weren't
|
74 |
+
a
|
75 |
+
down
|
76 |
+
the
|
77 |
+
how's
|
78 |
+
were
|
79 |
+
we're
|
80 |
+
isn't
|
81 |
+
when
|
82 |
+
ours
|
83 |
+
they've
|
84 |
+
just
|
85 |
+
it's
|
86 |
+
very
|
87 |
+
between
|
88 |
+
that
|
89 |
+
an
|
90 |
+
be
|
91 |
+
yourself
|
92 |
+
we'd
|
93 |
+
into
|
94 |
+
than
|
95 |
+
theirs
|
96 |
+
why's
|
97 |
+
k
|
98 |
+
has
|
99 |
+
let's
|
100 |
+
out
|
101 |
+
therefore
|
102 |
+
wouldn't
|
103 |
+
you'd
|
104 |
+
while
|
105 |
+
could
|
106 |
+
you'll
|
107 |
+
ever
|
108 |
+
himself
|
109 |
+
during
|
110 |
+
his
|
111 |
+
that's
|
112 |
+
or
|
113 |
+
you've
|
114 |
+
he'd
|
115 |
+
own
|
116 |
+
all
|
117 |
+
through
|
118 |
+
r
|
119 |
+
can
|
120 |
+
she's
|
121 |
+
they're
|
122 |
+
hadn't
|
123 |
+
like
|
124 |
+
he
|
125 |
+
me
|
126 |
+
themselves
|
127 |
+
i
|
128 |
+
i'm
|
129 |
+
which
|
130 |
+
does
|
131 |
+
nor
|
132 |
+
above
|
133 |
+
we'll
|
134 |
+
have
|
135 |
+
mustn't
|
136 |
+
over
|
137 |
+
myself
|
138 |
+
however
|
139 |
+
both
|
140 |
+
below
|
141 |
+
each
|
142 |
+
few
|
143 |
+
about
|
144 |
+
there's
|
145 |
+
so
|
146 |
+
under
|
147 |
+
from
|
148 |
+
i'll
|
149 |
+
you're
|
150 |
+
as
|
151 |
+
she'll
|
152 |
+
other
|
153 |
+
cannot
|
154 |
+
else
|
155 |
+
who
|
156 |
+
who's
|
157 |
+
more
|
158 |
+
by
|
159 |
+
it
|
160 |
+
what's
|
161 |
+
they'd
|
162 |
+
off
|
163 |
+
are
|
164 |
+
too
|
165 |
+
if
|
166 |
+
we've
|
167 |
+
am
|
168 |
+
any
|
169 |
+
once
|
170 |
+
won't
|
171 |
+
hasn't
|
172 |
+
can't
|
173 |
+
because
|
174 |
+
here
|
175 |
+
here's
|
176 |
+
did
|
177 |
+
herself
|
178 |
+
of
|
179 |
+
these
|
180 |
+
couldn't
|
181 |
+
he'll
|
182 |
+
itself
|
183 |
+
up
|
184 |
+
its
|
185 |
+
further
|
186 |
+
do
|
187 |
+
she
|
188 |
+
how
|
189 |
+
com
|
190 |
+
doing
|
191 |
+
aren't
|
192 |
+
why
|
193 |
+
и
|
194 |
+
в
|
195 |
+
во
|
196 |
+
не
|
197 |
+
что
|
198 |
+
он
|
199 |
+
на
|
200 |
+
я
|
201 |
+
с
|
202 |
+
со
|
203 |
+
как
|
204 |
+
а
|
205 |
+
то
|
206 |
+
все
|
207 |
+
она
|
208 |
+
так
|
209 |
+
его
|
210 |
+
но
|
211 |
+
да
|
212 |
+
ты
|
213 |
+
к
|
214 |
+
у
|
215 |
+
же
|
216 |
+
вы
|
217 |
+
за
|
218 |
+
бы
|
219 |
+
по
|
220 |
+
только
|
221 |
+
ее
|
222 |
+
мне
|
223 |
+
было
|
224 |
+
вот
|
225 |
+
от
|
226 |
+
меня
|
227 |
+
еще
|
228 |
+
нет
|
229 |
+
о
|
230 |
+
из
|
231 |
+
ему
|
232 |
+
теперь
|
233 |
+
когда
|
234 |
+
даже
|
235 |
+
ну
|
236 |
+
вдруг
|
237 |
+
ли
|
238 |
+
если
|
239 |
+
уже
|
240 |
+
или
|
241 |
+
ни
|
242 |
+
быть
|
243 |
+
был
|
244 |
+
него
|
245 |
+
до
|
246 |
+
вас
|
247 |
+
нибудь
|
248 |
+
опять
|
249 |
+
уж
|
250 |
+
вам
|
251 |
+
ведь
|
252 |
+
там
|
253 |
+
потом
|
254 |
+
себя
|
255 |
+
ничего
|
256 |
+
ей
|
257 |
+
может
|
258 |
+
они
|
259 |
+
тут
|
260 |
+
где
|
261 |
+
есть
|
262 |
+
надо
|
263 |
+
ней
|
264 |
+
для
|
265 |
+
мы
|
266 |
+
тебя
|
267 |
+
их
|
268 |
+
чем
|
269 |
+
была
|
270 |
+
сам
|
271 |
+
чтоб
|
272 |
+
без
|
273 |
+
будто
|
274 |
+
чего
|
275 |
+
раз
|
276 |
+
тоже
|
277 |
+
себе
|
278 |
+
под
|
279 |
+
будет
|
280 |
+
ж
|
281 |
+
тогда
|
282 |
+
кто
|
283 |
+
этот
|
284 |
+
того
|
285 |
+
потому
|
286 |
+
этого
|
287 |
+
какой
|
288 |
+
совсем
|
289 |
+
ним
|
290 |
+
здесь
|
291 |
+
этом
|
292 |
+
один
|
293 |
+
почти
|
294 |
+
мой
|
295 |
+
тем
|
296 |
+
чтобы
|
297 |
+
нее
|
298 |
+
сейчас
|
299 |
+
были
|
300 |
+
куда
|
301 |
+
зачем
|
302 |
+
всех
|
303 |
+
никогда
|
304 |
+
можно
|
305 |
+
при
|
306 |
+
наконец
|
307 |
+
два
|
308 |
+
об
|
309 |
+
другой
|
310 |
+
хоть
|
311 |
+
после
|
312 |
+
над
|
313 |
+
больше
|
314 |
+
тот
|
315 |
+
через
|
316 |
+
эти
|
317 |
+
нас
|
318 |
+
про
|
319 |
+
всего
|
320 |
+
них
|
321 |
+
какая
|
322 |
+
много
|
323 |
+
разве
|
324 |
+
три
|
325 |
+
эту
|
326 |
+
моя
|
327 |
+
впрочем
|
328 |
+
хорошо
|
329 |
+
свою
|
330 |
+
этой
|
331 |
+
это
|
332 |
+
перед
|
333 |
+
иногда
|
334 |
+
лучше
|
335 |
+
чуть
|
336 |
+
том
|
337 |
+
нельзя
|
338 |
+
такой
|
339 |
+
им
|
340 |
+
более
|
341 |
+
всегда
|
342 |
+
конечно
|
343 |
+
всю
|
344 |
+
между
|
345 |
+
rt
|
346 |
+
amp
|
347 |
+
авжеж
|
348 |
+
адже
|
349 |
+
але
|
350 |
+
або
|
351 |
+
а
|
352 |
+
б
|
353 |
+
без
|
354 |
+
був
|
355 |
+
була
|
356 |
+
були
|
357 |
+
було
|
358 |
+
бути
|
359 |
+
більш
|
360 |
+
вам
|
361 |
+
вас
|
362 |
+
весь
|
363 |
+
вздовж
|
364 |
+
ви
|
365 |
+
вниз
|
366 |
+
внизу
|
367 |
+
вона
|
368 |
+
вони
|
369 |
+
воно
|
370 |
+
все
|
371 |
+
всередині
|
372 |
+
всіх
|
373 |
+
всі
|
374 |
+
від
|
375 |
+
він
|
376 |
+
вже
|
377 |
+
ваш
|
378 |
+
вами
|
379 |
+
вас
|
380 |
+
вашого
|
381 |
+
ваша
|
382 |
+
ваші
|
383 |
+
да
|
384 |
+
давай
|
385 |
+
давати
|
386 |
+
де
|
387 |
+
дещо
|
388 |
+
для
|
389 |
+
до
|
390 |
+
з
|
391 |
+
завжди
|
392 |
+
замість
|
393 |
+
й
|
394 |
+
коли
|
395 |
+
ледве
|
396 |
+
майже
|
397 |
+
ми
|
398 |
+
навколо
|
399 |
+
навіть
|
400 |
+
нам
|
401 |
+
наш
|
402 |
+
наші
|
403 |
+
наша
|
404 |
+
нашого
|
405 |
+
наших
|
406 |
+
нас
|
407 |
+
от
|
408 |
+
отже
|
409 |
+
отож
|
410 |
+
поза
|
411 |
+
про
|
412 |
+
під
|
413 |
+
та
|
414 |
+
так
|
415 |
+
такий
|
416 |
+
також
|
417 |
+
те
|
418 |
+
ти
|
419 |
+
тобто
|
420 |
+
тож
|
421 |
+
тощо
|
422 |
+
той
|
423 |
+
хоча
|
424 |
+
це
|
425 |
+
цей
|
426 |
+
чи
|
427 |
+
чого
|
428 |
+
чий
|
429 |
+
що
|
430 |
+
як
|
431 |
+
які
|
432 |
+
який
|
433 |
+
якої
|
434 |
+
є
|
435 |
+
і
|
436 |
+
із
|
437 |
+
інших
|
438 |
+
їх
|
439 |
+
її
|
440 |
+
їй
|
441 |
+
така
|
442 |
+
просто
|
443 |
+
буде
|
444 |
+
бо
|
445 |
+
може
|
446 |
+
ніби
|
447 |
+
мені
|
448 |
+
зі
|
449 |
+
тебе
|
450 |
+
теж
|
451 |
+
свій
|
452 |
+
мої
|
453 |
+
щоб
|
454 |
+
ще
|
455 |
+
ні
|
456 |
+
поки
|
457 |
+
хто
|
458 |
+
мій
|
459 |
+
чому
|
460 |
+
щось
|
461 |
+
хтось
|
462 |
+
моєму
|
463 |
+
твоєму
|
464 |
+
моїй
|
465 |
+
мого
|
466 |
+
мою
|
467 |
+
мене
|
468 |
+
тієї
|
469 |
+
тією
|
470 |
+
тої
|
471 |
+
тій
|
472 |
+
ту
|
473 |
+
тою
|
474 |
+
тим
|
475 |
+
того
|
476 |
+
тому
|
477 |
+
тебе
|
478 |
+
якщо
|
479 |
+
всім
|
480 |
+
ті
|
481 |
+
дуже
|
482 |
+
собі
|
483 |
+
собою
|
484 |
+
себе
|
485 |
+
такі
|
helper.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from river import stream
|
2 |
+
from river import cluster
|
3 |
+
|
4 |
+
class River:
|
5 |
+
def __init__(self, model):
|
6 |
+
self.model = model
|
7 |
+
|
8 |
+
def partial_fit(self, umap_embeddings):
|
9 |
+
for umap_embedding, _ in stream.iter_array(umap_embeddings):
|
10 |
+
self.model = self.model.learn_one(umap_embedding)
|
11 |
+
|
12 |
+
labels = []
|
13 |
+
for umap_embedding, _ in stream.iter_array(umap_embeddings):
|
14 |
+
label = self.model.predict_one(umap_embedding)
|
15 |
+
labels.append(label)
|
16 |
+
|
17 |
+
self.labels_ = labels
|
18 |
+
return self
|
19 |
+
|
20 |
+
|
21 |
+
import pandas as pd
|
22 |
+
from typing import List
|
23 |
+
import plotly.graph_objects as go
|
24 |
+
from sklearn.preprocessing import normalize
|
25 |
+
|
26 |
+
|
27 |
+
def visualize_topics_over_time(topic_model,
|
28 |
+
topics_over_time: pd.DataFrame,
|
29 |
+
top_n_topics: int = None,
|
30 |
+
topics: List[int] = None,
|
31 |
+
normalize_frequency: bool = False,
|
32 |
+
custom_labels: bool = False,
|
33 |
+
title: str = "<b>Topics over Time</b>",
|
34 |
+
width: int = 860,
|
35 |
+
height: int = 600) -> go.Figure:
|
36 |
+
"""
|
37 |
+
Based on BERTopic's funciton https://github.com/MaartenGr/BERTopic/blob/809414b88ca3f12a46728069d098d82345986489/bertopic/plotting/_topics_over_time.py
|
38 |
+
"""
|
39 |
+
#colors = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#D55E00", "#0072B2", "#CC79A7"]
|
40 |
+
|
41 |
+
# Select topics based on top_n and topics args
|
42 |
+
freq_df = topic_model.get_topic_freq()
|
43 |
+
freq_df = freq_df.loc[freq_df.Topic != -1, :]
|
44 |
+
if topics is not None:
|
45 |
+
selected_topics = list(topics)
|
46 |
+
elif top_n_topics is not None:
|
47 |
+
selected_topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
|
48 |
+
else:
|
49 |
+
selected_topics = sorted(freq_df.Topic.to_list())
|
50 |
+
|
51 |
+
# Prepare data
|
52 |
+
if topic_model.custom_labels_ is not None and custom_labels:
|
53 |
+
topic_names = {key: topic_model.custom_labels_[key + topic_model._outliers] for key, _ in topic_model.topic_labels_.items()}
|
54 |
+
else:
|
55 |
+
topic_names = {key: value[:30] + "..." if len(value) > 30 else value
|
56 |
+
for key, value in topic_model.topic_labels_.items()}
|
57 |
+
topics_over_time["Name"] = topics_over_time.Topic.map(topic_names)
|
58 |
+
data = topics_over_time.loc[topics_over_time.Topic.isin(selected_topics), :].sort_values(["Topic", "Timestamp"])
|
59 |
+
|
60 |
+
# Add traces
|
61 |
+
fig = go.Figure()
|
62 |
+
for index, topic in enumerate(data.Topic.unique()):
|
63 |
+
trace_data = data.loc[data.Topic == topic, :]
|
64 |
+
topic_name = trace_data.Name.values[0]
|
65 |
+
words = trace_data.Words.values
|
66 |
+
if normalize_frequency:
|
67 |
+
y = normalize(trace_data.Frequency.values.reshape(1, -1))[0]
|
68 |
+
else:
|
69 |
+
y = trace_data.Frequency
|
70 |
+
fig.add_trace(go.Scatter(x=pd.to_datetime(trace_data.Timestamp), y=y,
|
71 |
+
mode='lines',
|
72 |
+
#marker_color=colors[index % 7],
|
73 |
+
hoverinfo="text",
|
74 |
+
name=topic_name,
|
75 |
+
hovertext=[f'<b>Topic {topic}</b><br>Words: {word}' for word in words]))
|
76 |
+
|
77 |
+
# Styling of the visualization
|
78 |
+
#fig.update_xaxes(
|
79 |
+
# dtick=7,
|
80 |
+
# tickformat="%b\n%Y"
|
81 |
+
# )
|
82 |
+
fig.update_layout(
|
83 |
+
yaxis_title="Normalized Frequency" if normalize_frequency else "Frequency",
|
84 |
+
title={'text':f'{title}',
|
85 |
+
'font': dict(size=22)
|
86 |
+
},
|
87 |
+
width=width,
|
88 |
+
height=height,
|
89 |
+
hoverlabel=dict(
|
90 |
+
bgcolor="white",
|
91 |
+
font_size=16,
|
92 |
+
#font_family="Rockwell"
|
93 |
+
),
|
94 |
+
legend=dict(
|
95 |
+
title="<b>Global Topic Representation",
|
96 |
+
orientation="h",
|
97 |
+
y = -.2,
|
98 |
+
x = 0
|
99 |
+
#yanchor="bottom",
|
100 |
+
#xanchor="left"
|
101 |
+
)
|
102 |
+
)
|
103 |
+
return fig
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
def visualize_topics_per_class(topic_model,
|
108 |
+
topics_per_class: pd.DataFrame,
|
109 |
+
top_n_topics: int = 10,
|
110 |
+
topics: List[int] = None,
|
111 |
+
normalize_frequency: bool = False,
|
112 |
+
custom_labels: bool = False,
|
113 |
+
title: str = "<b>Topics per Class</b>",
|
114 |
+
width: int = 900,
|
115 |
+
height: int = 900) -> go.Figure:
|
116 |
+
"""
|
117 |
+
Based on BERTopic's funciton https://github.com/MaartenGr/BERTopic/blob/809414b88ca3f12a46728069d098d82345986489/bertopic/plotting/_topics_per_class.py
|
118 |
+
"""
|
119 |
+
|
120 |
+
# Select topics based on top_n and topics args
|
121 |
+
freq_df = topic_model.get_topic_freq()
|
122 |
+
freq_df = freq_df.loc[freq_df.Topic != -1, :]
|
123 |
+
if topics is not None:
|
124 |
+
selected_topics = list(topics)
|
125 |
+
elif top_n_topics is not None:
|
126 |
+
#selected_topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
|
127 |
+
selected_topics = freq_df.Topic.to_list()[:top_n_topics]
|
128 |
+
else:
|
129 |
+
selected_topics = sorted(freq_df.Topic.to_list())
|
130 |
+
|
131 |
+
# Prepare data
|
132 |
+
if topic_model.custom_labels_ is not None and custom_labels:
|
133 |
+
topic_names = {key: topic_model.custom_labels_[key + topic_model._outliers] for key, _ in topic_model.topic_labels_.items()}
|
134 |
+
else:
|
135 |
+
topic_names = {key: value[:40] + "..." if len(value) > 40 else value
|
136 |
+
for key, value in topic_model.topic_labels_.items()}
|
137 |
+
topics_per_class["Name"] = topics_per_class.Topic.map(topic_names)
|
138 |
+
data = topics_per_class.loc[topics_per_class.Topic.isin(selected_topics), :]
|
139 |
+
|
140 |
+
# Add traces
|
141 |
+
fig = go.Figure()
|
142 |
+
for index, topic in enumerate(selected_topics):
|
143 |
+
if index == 0:
|
144 |
+
visible = True
|
145 |
+
else:
|
146 |
+
visible = "legendonly"
|
147 |
+
trace_data = data.loc[data.Topic == topic, :]
|
148 |
+
topic_name = trace_data.Name.values[0]
|
149 |
+
words = trace_data.Words.values
|
150 |
+
if normalize_frequency:
|
151 |
+
x = normalize(trace_data.Frequency.values.reshape(1, -1))[0]
|
152 |
+
else:
|
153 |
+
x = trace_data.Frequency
|
154 |
+
fig.add_trace(go.Bar(y=trace_data.Class,
|
155 |
+
x=x,
|
156 |
+
visible=visible,
|
157 |
+
hoverinfo="text",
|
158 |
+
name=topic_name,
|
159 |
+
orientation="h",
|
160 |
+
hovertext=[f'<b>Topic {topic}</b><br>Words: {word}' for word in words]))
|
161 |
+
|
162 |
+
# Styling of the visualization
|
163 |
+
fig.update_xaxes(showgrid=True)
|
164 |
+
fig.update_yaxes(showgrid=True)
|
165 |
+
fig.update_layout(
|
166 |
+
xaxis_title="Normalized Frequency" if normalize_frequency else "Frequency",
|
167 |
+
yaxis_title="Class",
|
168 |
+
title={
|
169 |
+
'text': f"{title}",
|
170 |
+
'font': dict(
|
171 |
+
size=22)
|
172 |
+
},
|
173 |
+
width=width,
|
174 |
+
height=height,
|
175 |
+
hoverlabel=dict(
|
176 |
+
bgcolor="white",
|
177 |
+
font_size=16,
|
178 |
+
),
|
179 |
+
legend=dict(
|
180 |
+
title="<b>Global Topic Representation",
|
181 |
+
)
|
182 |
+
)
|
183 |
+
return fig
|