vtrubamacrocosmos commited on
Commit
aa57274
β€’
1 Parent(s): b020462

Upload 3 files

Browse files

Add visualisation of some data analytics

Files changed (3) hide show
  1. analysis_results.json +763 -0
  2. app.py +126 -21
  3. requirements.txt +2 -0
analysis_results.json ADDED
@@ -0,0 +1,763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "structure": {
3
+ "total_partitions": 128,
4
+ "total_rows": 127890659,
5
+ "columns": [
6
+ "text",
7
+ "label",
8
+ "dataType",
9
+ "communityName",
10
+ "datetime"
11
+ ],
12
+ "date_range": [
13
+ "2024-07-01",
14
+ "2024-06-30"
15
+ ]
16
+ },
17
+ "communities": [
18
+ {
19
+ "communityName": "r/AskReddit",
20
+ "count": 532457,
21
+ "percentage": 0.4163376779534774
22
+ },
23
+ {
24
+ "communityName": "r/Market76",
25
+ "count": 503023,
26
+ "percentage": 0.39332270545263204
27
+ },
28
+ {
29
+ "communityName": "r/teenagers",
30
+ "count": 471103,
31
+ "percentage": 0.36836388496520295
32
+ },
33
+ {
34
+ "communityName": "r/Eldenring",
35
+ "count": 462147,
36
+ "percentage": 0.36136102793871755
37
+ },
38
+ {
39
+ "communityName": "r/AITAH",
40
+ "count": 443492,
41
+ "percentage": 0.34677434885998987
42
+ },
43
+ {
44
+ "communityName": "r/NoStupidQuestions",
45
+ "count": 440095,
46
+ "percentage": 0.34411817363455766
47
+ },
48
+ {
49
+ "communityName": "r/Monopoly_GO",
50
+ "count": 429916,
51
+ "percentage": 0.33615903097348176
52
+ },
53
+ {
54
+ "communityName": "r/AmItheAsshole",
55
+ "count": 417077,
56
+ "percentage": 0.3261199866051202
57
+ },
58
+ {
59
+ "communityName": "r/facepalm",
60
+ "count": 401747,
61
+ "percentage": 0.3141331846604997
62
+ },
63
+ {
64
+ "communityName": "r/MonopolyGoTrading",
65
+ "count": 381770,
66
+ "percentage": 0.2985128100716097
67
+ },
68
+ {
69
+ "communityName": "r/relationship_advice",
70
+ "count": 360211,
71
+ "percentage": 0.2816554413094392
72
+ },
73
+ {
74
+ "communityName": "r/Helldivers",
75
+ "count": 320273,
76
+ "percentage": 0.25042720281862024
77
+ },
78
+ {
79
+ "communityName": "r/pics",
80
+ "count": 317068,
81
+ "percentage": 0.24792115583672142
82
+ },
83
+ {
84
+ "communityName": "r/mildlyinfuriating",
85
+ "count": 316527,
86
+ "percentage": 0.24749813823384867
87
+ },
88
+ {
89
+ "communityName": "r/cats",
90
+ "count": 300228,
91
+ "percentage": 0.23475365781014546
92
+ },
93
+ {
94
+ "communityName": "r/politics",
95
+ "count": 288332,
96
+ "percentage": 0.22545196205455476
97
+ },
98
+ {
99
+ "communityName": "r/Brawlstars",
100
+ "count": 284389,
101
+ "percentage": 0.22236885963657438
102
+ },
103
+ {
104
+ "communityName": "r/soccer",
105
+ "count": 274877,
106
+ "percentage": 0.21493125623819015
107
+ },
108
+ {
109
+ "communityName": "r/Superstonk",
110
+ "count": 266451,
111
+ "percentage": 0.20834281571729174
112
+ },
113
+ {
114
+ "communityName": "r/pcmasterrace",
115
+ "count": 259127,
116
+ "percentage": 0.20261604876084033
117
+ },
118
+ {
119
+ "communityName": "r/nba",
120
+ "count": 258699,
121
+ "percentage": 0.20228138788463043
122
+ },
123
+ {
124
+ "communityName": "r/interestingasfuck",
125
+ "count": 254769,
126
+ "percentage": 0.19920845039980598
127
+ },
128
+ {
129
+ "communityName": "r/worldnews",
130
+ "count": 253412,
131
+ "percentage": 0.19814738776191623
132
+ },
133
+ {
134
+ "communityName": "r/gaming",
135
+ "count": 252390,
136
+ "percentage": 0.19734826763227487
137
+ },
138
+ {
139
+ "communityName": "r/Christianity",
140
+ "count": 236888,
141
+ "percentage": 0.18522697580282232
142
+ },
143
+ {
144
+ "communityName": "r/movies",
145
+ "count": 229012,
146
+ "percentage": 0.17906859014621232
147
+ },
148
+ {
149
+ "communityName": "r/wallstreetbets",
150
+ "count": 225277,
151
+ "percentage": 0.17614812665872648
152
+ },
153
+ {
154
+ "communityName": "r/HonkaiStarRail",
155
+ "count": 221247,
156
+ "percentage": 0.17299699738039506
157
+ },
158
+ {
159
+ "communityName": "r/Sexting_Adults",
160
+ "count": 216583,
161
+ "percentage": 0.16935013213122938
162
+ },
163
+ {
164
+ "communityName": "r/PatchesEmporium",
165
+ "count": 215685,
166
+ "percentage": 0.16864796982553668
167
+ },
168
+ {
169
+ "communityName": "r/DestinyTheGame",
170
+ "count": 213897,
171
+ "percentage": 0.1672499005576318
172
+ },
173
+ {
174
+ "communityName": "r/mildlyinteresting",
175
+ "count": 212715,
176
+ "percentage": 0.16632567355837927
177
+ },
178
+ {
179
+ "communityName": "r/ffxiv",
180
+ "count": 211391,
181
+ "percentage": 0.1652904142123468
182
+ },
183
+ {
184
+ "communityName": "r/fo76",
185
+ "count": 208255,
186
+ "percentage": 0.16283831956796782
187
+ },
188
+ {
189
+ "communityName": "r/memes",
190
+ "count": 205653,
191
+ "percentage": 0.16080376910091612
192
+ },
193
+ {
194
+ "communityName": "r/PokemonGoRaids",
195
+ "count": 203790,
196
+ "percentage": 0.15934705598788101
197
+ },
198
+ {
199
+ "communityName": "r/GaySnapchatShare",
200
+ "count": 202816,
201
+ "percentage": 0.158585467919123
202
+ },
203
+ {
204
+ "communityName": "r/ask",
205
+ "count": 199458,
206
+ "percentage": 0.1559597874931585
207
+ },
208
+ {
209
+ "communityName": "r/anime",
210
+ "count": 195877,
211
+ "percentage": 0.15315973936767344
212
+ },
213
+ {
214
+ "communityName": "r/AMA",
215
+ "count": 188882,
216
+ "percentage": 0.14769022341185997
217
+ },
218
+ {
219
+ "communityName": "r/TheBoys",
220
+ "count": 187839,
221
+ "percentage": 0.14687468300558212
222
+ },
223
+ {
224
+ "communityName": "r/SquaredCircle",
225
+ "count": 187552,
226
+ "percentage": 0.14665027255821708
227
+ },
228
+ {
229
+ "communityName": "r/hazbin",
230
+ "count": 187090,
231
+ "percentage": 0.14628902647221484
232
+ },
233
+ {
234
+ "communityName": "r/unpopularopinion",
235
+ "count": 186100,
236
+ "percentage": 0.1455149277164957
237
+ },
238
+ {
239
+ "communityName": "r/PersonalizedGameRecs",
240
+ "count": 181432,
241
+ "percentage": 0.14186493479558973
242
+ },
243
+ {
244
+ "communityName": "r/CharacterAI",
245
+ "count": 178629,
246
+ "percentage": 0.139673218823589
247
+ },
248
+ {
249
+ "communityName": "r/ufc",
250
+ "count": 178575,
251
+ "percentage": 0.1396309952550952
252
+ },
253
+ {
254
+ "communityName": "r/HouseOfTheDragon",
255
+ "count": 178312,
256
+ "percentage": 0.13942535083817184
257
+ },
258
+ {
259
+ "communityName": "r/GenZ",
260
+ "count": 176004,
261
+ "percentage": 0.13762068424403068
262
+ },
263
+ {
264
+ "communityName": "r/Damnthatsinteresting",
265
+ "count": 174819,
266
+ "percentage": 0.13669411149097294
267
+ },
268
+ {
269
+ "communityName": "r/neoliberal",
270
+ "count": 173635,
271
+ "percentage": 0.13576832065585023
272
+ },
273
+ {
274
+ "communityName": "r/Genshin_Impact",
275
+ "count": 173527,
276
+ "percentage": 0.13568387351886269
277
+ },
278
+ {
279
+ "communityName": "r/Minecraft",
280
+ "count": 172175,
281
+ "percentage": 0.13462672047064828
282
+ },
283
+ {
284
+ "communityName": "r/WutheringWaves",
285
+ "count": 171039,
286
+ "percentage": 0.13373846169640896
287
+ },
288
+ {
289
+ "communityName": "r/FortNiteBR",
290
+ "count": 170872,
291
+ "percentage": 0.1336078814012523
292
+ },
293
+ {
294
+ "communityName": "r/AskOuija",
295
+ "count": 169284,
296
+ "percentage": 0.1323661957203614
297
+ },
298
+ {
299
+ "communityName": "r/dating",
300
+ "count": 167593,
301
+ "percentage": 0.13104397249215832
302
+ },
303
+ {
304
+ "communityName": "r/leagueoflegends",
305
+ "count": 166086,
306
+ "percentage": 0.1298656221640081
307
+ },
308
+ {
309
+ "communityName": "r/Music",
310
+ "count": 164834,
311
+ "percentage": 0.12888666090930065
312
+ },
313
+ {
314
+ "communityName": "r/BaldursGate3",
315
+ "count": 164621,
316
+ "percentage": 0.12872011238913078
317
+ },
318
+ {
319
+ "communityName": "r/todayilearned",
320
+ "count": 161672,
321
+ "percentage": 0.12641423639860985
322
+ },
323
+ {
324
+ "communityName": "r/TrueOffMyChest",
325
+ "count": 161071,
326
+ "percentage": 0.1259443037196329
327
+ },
328
+ {
329
+ "communityName": "r/dating_advice",
330
+ "count": 156867,
331
+ "percentage": 0.12265712072059931
332
+ },
333
+ {
334
+ "communityName": "r/PowerScaling",
335
+ "count": 156325,
336
+ "percentage": 0.12223332119979145
337
+ },
338
+ {
339
+ "communityName": "r/WhitePeopleTwitter",
340
+ "count": 155658,
341
+ "percentage": 0.12171178193709987
342
+ },
343
+ {
344
+ "communityName": "r/Jujutsufolk",
345
+ "count": 154658,
346
+ "percentage": 0.12092986400203005
347
+ },
348
+ {
349
+ "communityName": "r/namenerds",
350
+ "count": 153511,
351
+ "percentage": 0.12003300413050495
352
+ },
353
+ {
354
+ "communityName": "r/AskPH",
355
+ "count": 150618,
356
+ "percentage": 0.11777091554434793
357
+ },
358
+ {
359
+ "communityName": "r/2007scape",
360
+ "count": 147881,
361
+ "percentage": 0.11563080615606179
362
+ },
363
+ {
364
+ "communityName": "r/travisandtaylor",
365
+ "count": 147873,
366
+ "percentage": 0.11562455081258124
367
+ },
368
+ {
369
+ "communityName": "r/europe",
370
+ "count": 146782,
371
+ "percentage": 0.11477147834542006
372
+ },
373
+ {
374
+ "communityName": "r/StarWars",
375
+ "count": 146625,
376
+ "percentage": 0.1146487172296141
377
+ },
378
+ {
379
+ "communityName": "r/OriginalCharacter",
380
+ "count": 146583,
381
+ "percentage": 0.11461587667634117
382
+ },
383
+ {
384
+ "communityName": "r/Destiny",
385
+ "count": 145557,
386
+ "percentage": 0.11381362887495951
387
+ },
388
+ {
389
+ "communityName": "r/BeyondTheFog",
390
+ "count": 145182,
391
+ "percentage": 0.11352040964930832
392
+ },
393
+ {
394
+ "communityName": "r/indiasocial",
395
+ "count": 143616,
396
+ "percentage": 0.11229592616298896
397
+ },
398
+ {
399
+ "communityName": "r/formula1",
400
+ "count": 142147,
401
+ "percentage": 0.11114728871637139
402
+ },
403
+ {
404
+ "communityName": "r/golf",
405
+ "count": 141326,
406
+ "percentage": 0.11050533409167904
407
+ },
408
+ {
409
+ "communityName": "r/Warframe",
410
+ "count": 141053,
411
+ "percentage": 0.11029187049540498
412
+ },
413
+ {
414
+ "communityName": "r/technology",
415
+ "count": 140680,
416
+ "percentage": 0.11000021510562394
417
+ },
418
+ {
419
+ "communityName": "r/Overwatch",
420
+ "count": 140479,
421
+ "percentage": 0.1098430496006749
422
+ },
423
+ {
424
+ "communityName": "r/OnePiece",
425
+ "count": 140096,
426
+ "percentage": 0.10954357503154316
427
+ },
428
+ {
429
+ "communityName": "r/RandomThoughts",
430
+ "count": 139372,
431
+ "percentage": 0.1089774664465526
432
+ },
433
+ {
434
+ "communityName": "r/DnD",
435
+ "count": 138994,
436
+ "percentage": 0.1086819014670962
437
+ },
438
+ {
439
+ "communityName": "r/motorcycles",
440
+ "count": 135876,
441
+ "percentage": 0.10624388134554846
442
+ },
443
+ {
444
+ "communityName": "r/BeAmazed",
445
+ "count": 133909,
446
+ "percentage": 0.10470584876726612
447
+ },
448
+ {
449
+ "communityName": "r/destiny2",
450
+ "count": 132612,
451
+ "percentage": 0.10369170120548053
452
+ },
453
+ {
454
+ "communityName": "r/questions",
455
+ "count": 132387,
456
+ "percentage": 0.10351576967008982
457
+ },
458
+ {
459
+ "communityName": "r/TeenagersButBetter",
460
+ "count": 131124,
461
+ "percentage": 0.10252820731809663
462
+ },
463
+ {
464
+ "communityName": "r/funny",
465
+ "count": 130574,
466
+ "percentage": 0.10209815245380822
467
+ },
468
+ {
469
+ "communityName": "r/Funnymemes",
470
+ "count": 130289,
471
+ "percentage": 0.10187530584231332
472
+ },
473
+ {
474
+ "communityName": "r/buildapc",
475
+ "count": 129266,
476
+ "percentage": 0.10107540379473688
477
+ },
478
+ {
479
+ "communityName": "r/horror",
480
+ "count": 129073,
481
+ "percentage": 0.1009244936332684
482
+ },
483
+ {
484
+ "communityName": "r/196",
485
+ "count": 128914,
486
+ "percentage": 0.1008001686815923
487
+ },
488
+ {
489
+ "communityName": "r/Fallout",
490
+ "count": 128174,
491
+ "percentage": 0.10022154940964062
492
+ },
493
+ {
494
+ "communityName": "r/Philippines",
495
+ "count": 127755,
496
+ "percentage": 0.09989392579484638
497
+ },
498
+ {
499
+ "communityName": "r/AskUK",
500
+ "count": 127016,
501
+ "percentage": 0.09931608844082976
502
+ },
503
+ {
504
+ "communityName": "r/MadeMeSmile",
505
+ "count": 126297,
506
+ "percentage": 0.09875388944551454
507
+ },
508
+ {
509
+ "communityName": "r/Warthunder",
510
+ "count": 125899,
511
+ "percentage": 0.09844268610735675
512
+ },
513
+ {
514
+ "communityName": "r/Parenting",
515
+ "count": 124075,
516
+ "percentage": 0.09701646779378938
517
+ }
518
+ ],
519
+ "time_distribution": [
520
+ {
521
+ "date": "2024-06-08",
522
+ "count": 210037,
523
+ "percentage": 0.16423169732826226
524
+ },
525
+ {
526
+ "date": "2024-06-09",
527
+ "count": 4481970,
528
+ "percentage": 3.504532727444934
529
+ },
530
+ {
531
+ "date": "2024-06-10",
532
+ "count": 4852878,
533
+ "percentage": 3.7945523449058154
534
+ },
535
+ {
536
+ "date": "2024-06-11",
537
+ "count": 4709348,
538
+ "percentage": 3.682323663685242
539
+ },
540
+ {
541
+ "date": "2024-06-12",
542
+ "count": 4565568,
543
+ "percentage": 3.569899502980902
544
+ },
545
+ {
546
+ "date": "2024-06-13",
547
+ "count": 4529459,
548
+ "percentage": 3.541665228263465
549
+ },
550
+ {
551
+ "date": "2024-06-14",
552
+ "count": 4543307,
553
+ "percentage": 3.552493227828312
554
+ },
555
+ {
556
+ "date": "2024-06-15",
557
+ "count": 3927872,
558
+ "percentage": 3.0712735634586106
559
+ },
560
+ {
561
+ "date": "2024-06-16",
562
+ "count": 4163320,
563
+ "percentage": 3.255374577434932
564
+ },
565
+ {
566
+ "date": "2024-06-17",
567
+ "count": 4389056,
568
+ "percentage": 3.4318816044258558
569
+ },
570
+ {
571
+ "date": "2024-06-18",
572
+ "count": 2084785,
573
+ "percentage": 1.6301307822645592
574
+ },
575
+ {
576
+ "date": "2024-06-19",
577
+ "count": 1265032,
578
+ "percentage": 0.9891512092372594
579
+ },
580
+ {
581
+ "date": "2024-06-20",
582
+ "count": 4229784,
583
+ "percentage": 3.307343971071414
584
+ },
585
+ {
586
+ "date": "2024-06-21",
587
+ "count": 4475084,
588
+ "percentage": 3.4991484405440434
589
+ },
590
+ {
591
+ "date": "2024-06-22",
592
+ "count": 4142584,
593
+ "percentage": 3.2391607271333243
594
+ },
595
+ {
596
+ "date": "2024-06-23",
597
+ "count": 4266610,
598
+ "percentage": 3.3361388809482952
599
+ },
600
+ {
601
+ "date": "2024-06-24",
602
+ "count": 4447595,
603
+ "percentage": 3.4776542984269083
604
+ },
605
+ {
606
+ "date": "2024-06-25",
607
+ "count": 4578862,
608
+ "percentage": 3.58029432000972
609
+ },
610
+ {
611
+ "date": "2024-06-26",
612
+ "count": 4702600,
613
+ "percentage": 3.6770472814593913
614
+ },
615
+ {
616
+ "date": "2024-06-27",
617
+ "count": 4763948,
618
+ "percentage": 3.725016382940055
619
+ },
620
+ {
621
+ "date": "2024-06-28",
622
+ "count": 4583378,
623
+ "percentage": 3.5838254614044955
624
+ },
625
+ {
626
+ "date": "2024-06-29",
627
+ "count": 4356142,
628
+ "percentage": 3.406145557510967
629
+ },
630
+ {
631
+ "date": "2024-06-30",
632
+ "count": 4375560,
633
+ "percentage": 3.4213288399741533
634
+ },
635
+ {
636
+ "date": "2024-07-01",
637
+ "count": 4632358,
638
+ "percentage": 3.622123801864216
639
+ },
640
+ {
641
+ "date": "2024-07-02",
642
+ "count": 3835794,
643
+ "percentage": 2.9992761238332504
644
+ },
645
+ {
646
+ "date": "2024-07-03",
647
+ "count": 4245640,
648
+ "percentage": 3.3197420618498805
649
+ },
650
+ {
651
+ "date": "2024-07-04",
652
+ "count": 4076776,
653
+ "percentage": 3.1877042716622483
654
+ },
655
+ {
656
+ "date": "2024-07-05",
657
+ "count": 4013996,
658
+ "percentage": 3.1386154636985646
659
+ },
660
+ {
661
+ "date": "2024-07-06",
662
+ "count": 3939363,
663
+ "percentage": 3.0802585824504978
664
+ },
665
+ {
666
+ "date": "2024-07-07",
667
+ "count": 3994586,
668
+ "percentage": 3.123438436578859
669
+ },
670
+ {
671
+ "date": "2024-07-08",
672
+ "count": 4251041,
673
+ "percentage": 3.3239652006171925
674
+ },
675
+ {
676
+ "date": "2024-07-09",
677
+ "count": 2256326,
678
+ "percentage": 1.7642617667643734
679
+ }
680
+ ],
681
+ "top_topics": [
682
+ {
683
+ "topic_id": 1,
684
+ "words": "time just work don know years ve want year did",
685
+ "weight": 12.889261368556559
686
+ },
687
+ {
688
+ "topic_id": 2,
689
+ "words": "don just know like good think really thank thanks people",
690
+ "weight": 9.700709430574248
691
+ },
692
+ {
693
+ "topic_id": 3,
694
+ "words": "yes lol man je en bro la dm oh beautiful",
695
+ "weight": 2.726046094531505
696
+ },
697
+ {
698
+ "topic_id": 4,
699
+ "words": "reddit https com message post subreddit www questions compose bot",
700
+ "weight": 10.8575941432982
701
+ },
702
+ {
703
+ "topic_id": 5,
704
+ "words": "game just use play need don games new like using",
705
+ "weight": 11.02821702932872
706
+ },
707
+ {
708
+ "topic_id": 6,
709
+ "words": "people just like don think way world life doesn right",
710
+ "weight": 14.95772111405933
711
+ },
712
+ {
713
+ "topic_id": 7,
714
+ "words": "https format auto redd width preview webp pjpg que jpeg",
715
+ "weight": 3.121675100366933
716
+ },
717
+ {
718
+ "topic_id": 8,
719
+ "words": "https com www 10 youtube 2024 watch price org 20",
720
+ "weight": 6.260670568016835
721
+ },
722
+ {
723
+ "topic_id": 9,
724
+ "words": "like just love got really ve looks good think time",
725
+ "weight": 12.89084891866437
726
+ },
727
+ {
728
+ "topic_id": 10,
729
+ "words": "just like time don ve day good car make going",
730
+ "weight": 15.567256232603285
731
+ }
732
+ ],
733
+ "sentiment_distribution": [
734
+ {
735
+ "sentiment": "Neutral",
736
+ "count": 532871,
737
+ "percentage": 53.28832563148952
738
+ },
739
+ {
740
+ "sentiment": "Positive",
741
+ "count": 356163,
742
+ "percentage": 35.61711919374145
743
+ },
744
+ {
745
+ "sentiment": "Negative",
746
+ "count": 110943,
747
+ "percentage": 11.094555174769019
748
+ }
749
+ ],
750
+ "data_type_distribution": [
751
+ {
752
+ "dataType": "comment",
753
+ "count": 115545371,
754
+ "percentage": 90.34699789919762
755
+ },
756
+ {
757
+ "dataType": "post",
758
+ "count": 12345288,
759
+ "percentage": 9.653002100802373
760
+ }
761
+ ],
762
+ "avg_text_length": 195.0891413969491
763
+ }
app.py CHANGED
@@ -1,43 +1,63 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
 
3
 
4
  # Set page config
5
- st.set_page_config(layout="wide", page_title="Macrocosmos HF Dataset Explorer")
6
 
7
- # Custom CSS
8
  st.markdown("""
9
  <style>
10
  .stApp {
11
  max-width: 1200px;
12
  margin: 0 auto;
 
13
  }
14
- .st-emotion-cache-1r6slb0 {
15
- background-color: #f0f2f6;
16
  border-radius: 10px;
17
  padding: 20px;
18
- margin-bottom: 20px;
 
 
 
19
  }
20
- .st-emotion-cache-1wivap2 {
21
- background-color: #ffffff;
22
- border-radius: 10px;
23
- padding: 20px;
24
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
25
  }
26
- .company-logo {
27
- max-width: 200px;
 
28
  margin-bottom: 20px;
29
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  </style>
31
  """, unsafe_allow_html=True)
32
 
33
- # Company logo
34
- st.image("macrocosmos-black.png", use_column_width=False, width=200)
35
-
36
  # Title and description
37
- st.title("πŸ€— Hugging Face Large Dataset Explorer")
38
  st.markdown("Explore massive datasets hosted on Hugging Face, totaling approximately 100GB of data.")
39
 
40
  # Function to load dataset information
 
41
  def load_datasets():
42
  return [
43
  {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/icedwind/x_dataset_19", "Number of rows": "332 MLN"},
@@ -57,12 +77,14 @@ datasets = load_datasets()
57
  df = pd.DataFrame(datasets)
58
 
59
  # Display statistics
60
- col1, col2 = st.columns(2)
61
  with col1:
62
  total_rows = sum(float(str(rows).split()[0].replace(',', '')) for rows in df['Number of rows'])
63
- st.metric("Total Rows", f"{total_rows:.2f} Million")
64
  with col2:
65
  st.metric("Total Datasets", len(df))
 
 
66
 
67
  # Display the dataset table
68
  st.subheader("Dataset Overview")
@@ -77,8 +99,91 @@ st.dataframe(
77
  use_container_width=True
78
  )
79
 
80
- # Add a note about the size of the datasets
81
- st.info("⚠️ These datasets are very large, totaling approximately 100GB. They are not available for direct download through this interface. Please visit the individual dataset links for more information on accessing the data.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  # Add instructions for using the datasets
84
  st.subheader("How to Use These Datasets")
@@ -99,4 +204,4 @@ st.markdown("""
99
 
100
  # Footer
101
  st.markdown("---")
102
- st.markdown("Created by Macrocosmos with ❀️ ")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import json
4
+ import plotly.graph_objects as go
5
+ from plotly.subplots import make_subplots
6
 
7
  # Set page config
8
+ st.set_page_config(layout="wide", page_title="Macrocosmos HF Dataset Explorer", page_icon="🌌")
9
 
10
+ # Custom CSS (keep existing styles)
11
  st.markdown("""
12
  <style>
13
  .stApp {
14
  max-width: 1200px;
15
  margin: 0 auto;
16
+ font-family: 'Helvetica Neue', Arial, sans-serif;
17
  }
18
+ .cta-container {
19
+ background-color: #f0f8ff;
20
  border-radius: 10px;
21
  padding: 20px;
22
+ margin-top: 30px;
23
+ margin-bottom: 30px;
24
+ border: 2px solid #1e90ff;
25
+ text-align: center;
26
  }
27
+ .cta-title {
28
+ color: #1e90ff;
29
+ font-size: 24px;
30
+ font-weight: bold;
31
+ margin-bottom: 10px;
32
  }
33
+ .cta-description {
34
+ color: #333;
35
+ font-size: 16px;
36
  margin-bottom: 20px;
37
  }
38
+ .stButton > button {
39
+ background-color: #1e90ff;
40
+ color: white;
41
+ font-size: 18px;
42
+ font-weight: bold;
43
+ padding: 10px 24px;
44
+ border-radius: 5px;
45
+ border: none;
46
+ transition: all 0.3s ease;
47
+ }
48
+ .stButton > button:hover {
49
+ background-color: #0066cc;
50
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
51
+ }
52
  </style>
53
  """, unsafe_allow_html=True)
54
 
 
 
 
55
  # Title and description
56
+ st.title("🌌 Macrocosmos HF Dataset Explorer")
57
  st.markdown("Explore massive datasets hosted on Hugging Face, totaling approximately 100GB of data.")
58
 
59
  # Function to load dataset information
60
+ @st.cache_data
61
  def load_datasets():
62
  return [
63
  {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/icedwind/x_dataset_19", "Number of rows": "332 MLN"},
 
77
  df = pd.DataFrame(datasets)
78
 
79
  # Display statistics
80
+ col1, col2, col3 = st.columns(3)
81
  with col1:
82
  total_rows = sum(float(str(rows).split()[0].replace(',', '')) for rows in df['Number of rows'])
83
+ st.metric("Total Rows", f"{total_rows:.2f}M")
84
  with col2:
85
  st.metric("Total Datasets", len(df))
86
+ with col3:
87
+ st.metric("Total Data Size", "100GB+")
88
 
89
  # Display the dataset table
90
  st.subheader("Dataset Overview")
 
99
  use_container_width=True
100
  )
101
 
102
+ # Call-to-action section with styled button
103
+ st.markdown("""
104
+ <div class="cta-container">
105
+ <div class="cta-title">πŸ” Explore Dataset Insights</div>
106
+ <div class="cta-description">
107
+ Dive deep into the rich analytics of our dataset. Uncover trends, distributions, and key metrics that will enhance your understanding and guide your research.
108
+ </div>
109
+ </div>
110
+ """, unsafe_allow_html=True)
111
+
112
+ # Centered button
113
+ col1, col2, col3 = st.columns([1,2,1])
114
+ with col2:
115
+ show_analysis = st.button("Reveal Dataset Analysis", use_container_width=True)
116
+
117
+ # Display dataset analysis if the button was clicked
118
+ if show_analysis:
119
+ # Load analysis results
120
+ @st.cache_data
121
+ def load_analysis_results():
122
+ with open('analysis_results.json', 'r') as f:
123
+ return json.load(f)
124
+
125
+ analysis_results = load_analysis_results()
126
+
127
+ st.subheader("Analysis of a Sample Reddit Dataset")
128
+ st.write("This analysis is based on a sample from one of the Reddit datasets.")
129
+
130
+ # Display Dataset Structure
131
+ st.subheader("Dataset Structure")
132
+ structure = analysis_results['structure']
133
+ col1, col2, col3, col4 = st.columns(4)
134
+ col1.metric("Total Partitions", structure['total_partitions'])
135
+ col2.metric("Total Rows", f"{structure['total_rows']:,}")
136
+ col3.metric("Number of Columns", len(structure['columns']))
137
+ col4.metric("Date Range", f"{structure['date_range'][0]} to {structure['date_range'][1]}")
138
+
139
+ with st.expander("Show Columns"):
140
+ st.write(", ".join(structure['columns']))
141
+
142
+ # Display Top Communities
143
+ st.subheader("Top Communities")
144
+ communities_df = pd.DataFrame(analysis_results['communities'])
145
+ fig = go.Figure(data=[go.Bar(
146
+ x=communities_df['communityName'],
147
+ y=communities_df['count'],
148
+ text=communities_df['percentage'].apply(lambda x: f'{x:.2%}'),
149
+ textposition='auto',
150
+ marker_color='#1e88e5'
151
+ )])
152
+ fig.update_layout(title_text='Top Communities Distribution')
153
+ fig.update_traces(marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.6)
154
+ st.plotly_chart(fig, use_container_width=True)
155
+
156
+ # Display Time Distribution
157
+ st.subheader("Time Distribution")
158
+ time_df = pd.DataFrame(analysis_results['time_distribution'])
159
+ time_df['date'] = pd.to_datetime(time_df['date'])
160
+ fig = go.Figure(data=[go.Scatter(x=time_df['date'], y=time_df['count'], mode='lines+markers')])
161
+ fig.update_layout(title_text='Posts Over Time')
162
+ st.plotly_chart(fig, use_container_width=True)
163
+
164
+ # Display Sentiment Distribution
165
+ st.subheader("Sentiment Distribution")
166
+ sentiment_df = pd.DataFrame(analysis_results['sentiment_distribution'])
167
+ fig = go.Figure(data=[go.Pie(labels=sentiment_df['sentiment'], values=sentiment_df['count'], textinfo='percent+label')])
168
+ fig.update_layout(title_text='Sentiment Distribution')
169
+ fig.update_traces(marker=dict(colors=['#4CAF50', '#FFC107', '#F44336']))
170
+ st.plotly_chart(fig, use_container_width=True)
171
+
172
+ # Display Data Type Distribution
173
+ st.subheader("Data Type Distribution")
174
+ data_type_df = pd.DataFrame(analysis_results['data_type_distribution'])
175
+ fig = go.Figure(data=[go.Pie(labels=data_type_df['dataType'], values=data_type_df['count'], textinfo='percent+label')])
176
+ fig.update_layout(title_text='Data Type Distribution')
177
+ fig.update_traces(marker=dict(colors=['#2196F3', '#FF9800']))
178
+ st.plotly_chart(fig, use_container_width=True)
179
+
180
+ # Display Top Topics
181
+ st.subheader("Top Topics")
182
+ topics_df = pd.DataFrame(analysis_results['top_topics'])
183
+ st.dataframe(topics_df, use_container_width=True)
184
+
185
+ # Display Average Text Length
186
+ st.metric("Average Text Length", f"{analysis_results['avg_text_length']:.2f} characters")
187
 
188
  # Add instructions for using the datasets
189
  st.subheader("How to Use These Datasets")
 
204
 
205
  # Footer
206
  st.markdown("---")
207
+ st.markdown("Created by Macrocosmos with ❀️")
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  pandas
2
  streamlit
3
  streamlit-card
 
 
 
1
  pandas
2
  streamlit
3
  streamlit-card
4
+ plotly
5
+ streamlit-plotly-events