File size: 69,092 Bytes
35f521b
 
d970538
31a0966
 
 
d970538
14eda64
d970538
 
31a0966
d970538
14eda64
31a0966
41fc6d2
b6f295f
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6f295f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d6249c
b6f295f
7d6249c
b6f295f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d6249c
b6f295f
7d6249c
b6f295f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf6c642
b6f295f
 
7d6249c
 
 
b6f295f
 
 
 
 
 
 
 
 
 
 
 
 
 
bf6c642
b6f295f
 
7d6249c
 
 
b6f295f
 
 
 
 
 
 
 
bf6c642
b6f295f
 
7d6249c
 
 
b6f295f
7d6249c
b6f295f
 
7d6249c
 
b6f295f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d6249c
 
b6f295f
 
 
 
ce11baa
b6f295f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf6c642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6f295f
 
 
d970538
 
 
 
 
 
 
 
 
 
 
 
2828463
 
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c00a0d
21c7fed
9c00a0d
 
21c7fed
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a99ff4
d970538
6a99ff4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21c7fed
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a99ff4
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2828463
d970538
2828463
 
 
 
 
 
d970538
 
 
2828463
d970538
 
 
 
2828463
 
d970538
2828463
d970538
2828463
 
 
 
d970538
 
 
 
 
2828463
d970538
 
 
 
 
2828463
d970538
 
 
 
2828463
 
d970538
 
2828463
d970538
2828463
 
d970538
 
2828463
 
 
d970538
2828463
 
 
 
 
 
d970538
 
 
2828463
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c005b55
 
 
 
d970538
 
 
 
 
 
 
 
2828463
 
 
d970538
 
 
 
 
 
2828463
d970538
2828463
 
 
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c005b55
d970538
 
 
 
 
 
 
 
 
ce11baa
 
d970538
 
 
 
 
 
 
ce11baa
 
 
 
d970538
 
ce11baa
d970538
ce11baa
d970538
 
 
 
 
 
c005b55
d970538
c005b55
d970538
 
 
 
 
 
 
 
c005b55
 
 
 
 
d970538
 
c005b55
 
 
 
d970538
 
 
ce11baa
d970538
ce11baa
d970538
ce11baa
d970538
 
ce11baa
d970538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce11baa
d970538
 
ce11baa
d970538
ce11baa
d970538
ce11baa
d970538
 
ce11baa
d970538
ce11baa
d970538
ce11baa
d970538
 
ce11baa
d970538
ce11baa
d970538
ce11baa
d970538
 
c005b55
ce11baa
d970538
 
 
 
 
31a0966
 
d970538
 
 
 
 
31a0966
 
 
 
 
 
 
 
 
 
d970538
31a0966
 
 
 
 
 
 
bf6c642
31a0966
 
 
 
 
 
d970538
ab3cab2
d970538
 
 
 
 
 
 
 
3747a5f
 
 
 
 
 
 
d970538
 
3747a5f
 
 
d970538
 
 
 
 
 
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d970538
31a0966
 
 
 
d970538
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3747a5f
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d970538
31a0966
b6f295f
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf6c642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31a0966
 
 
bf6c642
 
 
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d970538
31a0966
 
 
98ebb33
c005b55
ab3cab2
d970538
5d9ad4f
 
 
 
 
d970538
31a0966
 
 
d970538
 
 
 
31a0966
 
 
d970538
 
 
 
 
 
31a0966
 
 
d970538
 
 
 
31a0966
 
 
d970538
 
31a0966
d970538
 
 
 
31a0966
 
 
 
 
 
 
d970538
 
 
31a0966
 
d970538
31a0966
d970538
31a0966
 
 
 
 
 
 
d970538
5d9ad4f
 
98ebb33
2828463
5d9ad4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21c7fed
31a0966
 
d970538
31a0966
 
 
 
 
d970538
31a0966
 
3371ce8
21c7fed
 
31a0966
 
 
 
 
 
 
3371ce8
31a0966
 
 
 
ab3cab2
 
3371ce8
 
 
ab3cab2
 
3371ce8
 
 
ab3cab2
 
3371ce8
 
 
31a0966
 
 
 
 
 
 
 
 
5d9ad4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31a0966
 
 
 
 
 
 
3747a5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31a0966
 
 
 
 
 
 
 
 
 
 
 
 
d970538
 
31a0966
2828463
 
 
 
c005b55
2828463
 
 
31a0966
 
d970538
31a0966
d970538
 
 
 
 
 
 
 
 
31a0966
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
import os
os.environ["STREAMLIT_GLOBAL_CONFIG"] = "/data/.streamlit/config.toml"
import time
import uuid
import random
import urllib.parse # To parse URL parameters
from functools import lru_cache
import streamlit as st
import numpy as np
import pandas as pd
import duckdb
import hdbscan

# Database file path
DB_PATH = '/data/steampolis.duckdb'
DEFAULT_BASE_URL = 'https://huggingface.co/spaces/npc0/SteamPolis/'

# Initialize database tables if they don't exist
def initialize_database():
    try:
        init_con = duckdb.connect(database=DB_PATH, read_only=False)
        init_con.execute("""
            CREATE TABLE IF NOT EXISTS topics (
                id TEXT PRIMARY KEY,
                name TEXT NOT NULL,
                description TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        init_con.execute("""
            CREATE TABLE IF NOT EXISTS users (
                id TEXT PRIMARY KEY,
                username TEXT NOT NULL UNIQUE,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        init_con.execute("""
            CREATE TABLE IF NOT EXISTS comments (
                id TEXT PRIMARY KEY,
                topic_id TEXT NOT NULL,
                user_id TEXT NOT NULL,
                content TEXT NOT NULL,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (topic_id) REFERENCES topics(id),
                FOREIGN KEY (user_id) REFERENCES users(id)
            )
        """)
        init_con.execute("""
            CREATE TABLE IF NOT EXISTS votes (
                id TEXT PRIMARY KEY,
                user_id TEXT NOT NULL,
                comment_id TEXT NOT NULL,
                vote_type TEXT NOT NULL CHECK (vote_type IN ('agree', 'disagree', 'neutral')),
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (user_id) REFERENCES users(id),
                FOREIGN KEY (comment_id) REFERENCES comments(id),
                UNIQUE (user_id, comment_id)
            )
        """)
        init_con.execute("""
            CREATE TABLE IF NOT EXISTS user_comment_collections (
                id TEXT PRIMARY KEY,
                user_id TEXT NOT NULL,
                comment_id TEXT NOT NULL,
                collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (user_id) REFERENCES users(id),
                FOREIGN KEY (comment_id) REFERENCES comments(id),
                UNIQUE (user_id, comment_id)
            )
        """)
        init_con.execute("""
            CREATE TABLE IF NOT EXISTS user_progress (
                id TEXT PRIMARY KEY,
                user_id TEXT NOT NULL,
                topic_id TEXT NOT NULL,
                last_comment_id_viewed TEXT,
                last_viewed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY (user_id) REFERENCES users(id),
                FOREIGN KEY (topic_id) REFERENCES topics(id),
                FOREIGN KEY (last_comment_id_viewed) REFERENCES comments(id),
                UNIQUE (user_id, topic_id)
            )
        """)

        # Create system user if it doesn't exist
        try:
            init_con.execute("""
                INSERT INTO users (id, username)
                VALUES ('system', 'System')
                ON CONFLICT (id) DO NOTHING
            """)
        except Exception as e:
            print(f"Warning: Could not create system user: {e}")

    except Exception as e:
        st.error(f"Database initialization failed: {e}")
    finally:
        if 'init_con' in locals() and init_con:
            init_con.close()


def add_example_topic(topic_id, topic_title, topic_description, comments_list):
    """
    Adds an example topic, its comments, and example cluster votes to the database.

    Args:
        topic_id (str): The unique ID for the topic.
        topic_title (str): The title of the topic.
        topic_description (str): The description of the topic.
        comments_list (list): A list of strings, where each string is a comment.
    """
    con = None
    try:
        con = duckdb.connect(database=DB_PATH)

        # Insert the topic - Corrected 'title' to 'name' based on schema
        con.execute("""
            INSERT INTO topics (id, name, description)
            VALUES (?, ?, ?)
            ON CONFLICT (id) DO NOTHING
        """, [topic_id, topic_title, topic_description])

        # --- Add Cluster Users ---
        # Create users who will cast votes, separate from comment authors
        num_users_per_cluster = 5
        cluster1_users = [] # e.g., Pro-Tech supporters
        cluster2_users = [] # e.g., Anti-Tech skeptics
        cluster3_users = [] # e.g., Mixed/Neutral voters

        all_cluster_users = []

        for i in range(num_users_per_cluster):
            user_id = str(uuid.uuid4())
            username = f"cluster1_user_{i+1}_{uuid.uuid4().hex[:4]}@example.com"
            con.execute("INSERT INTO users (id, username) VALUES (?, ?) ON CONFLICT (id) DO NOTHING", [user_id, username])
            cluster1_users.append(user_id)
            all_cluster_users.append(user_id)

            user_id = str(uuid.uuid4())
            username = f"cluster2_user_{i+1}_{uuid.uuid4().hex[:4]}@example.com"
            con.execute("INSERT INTO users (id, username) VALUES (?, ?) ON CONFLICT (id) DO NOTHING", [user_id, username])
            cluster2_users.append(user_id)
            all_cluster_users.append(user_id)

            user_id = str(uuid.uuid4())
            username = f"cluster3_user_{i+1}_{uuid.uuid4().hex[:4]}@example.com"
            con.execute("INSERT INTO users (id, username) VALUES (?, ?) ON CONFLICT (id) DO NOTHING", [user_id, username])
            cluster3_users.append(user_id)
            all_cluster_users.append(user_id)


        # --- Insert comments and associated users ---
        comment_id_map = {} # Map comment text to comment ID
        for comment_text in comments_list:
            comment_id = str(uuid.uuid4())
            # Generate a random user ID and username for the comment author
            author_user_id = str(uuid.uuid4())
            author_username = f"author_{uuid.uuid4().hex[:8]}@example.com"

            # Insert the author user
            con.execute("""
                INSERT INTO users (id, username)
                VALUES (?, ?)
                ON CONFLICT (id) DO NOTHING
            """, [author_user_id, author_username])

            # Insert the comment - Corrected 'text' to 'content' based on schema
            con.execute("""
                INSERT INTO comments (id, topic_id, user_id, content)
                VALUES (?, ?, ?, ?)
            """, [comment_id, topic_id, author_user_id, comment_text])
            comment_id_map[comment_text] = comment_id # Store the mapping

        # --- Add Cluster Votes ---
        # Define comment categories based on the example topic context (Civic Tech Initiative)
        # This is hardcoded based on the context provided in the prompt
        pro_tech_comments = [
            "Finally! A system to track rebel scum more efficiently. This will be a glorious day for the Empire!",
            "Anything that improves the speed of selling junk is good in my book. Maybe I can finally get a decent price for this thermal detonator...",
            "Fascinating! I am programmed to be compliant. I shall analyze this initiative and report my findings to the Emperor.",
            "This is a welcome step towards greater efficiency and transparency... cough... as long as it doesn't affect my personal interests.",
            "As long as it helps me track down my targets, I'm in. The more data, the better.",
            "The Emperor's vision is one of unparalleled order and prosperity! This initiative will usher in a new era of galactic harmony!",
            "I'm interested... Will it help me collect debts more efficiently?",
            "If it improves the entertainment options on Coruscant, I'm all for it.",
            "Another set of orders. Understood, sir!",
            "Excellent... with this, I will have even greater control over the galaxy... cackles maniacally"
        ]
        anti_tech_comments = [
            "This is clearly a data-mining operation. They're going to use it to crush the Rebellion. We need to sabotage it!",
            "The Force guides us to see through their deception. This 'civic tech' will only serve to tighten their grip on the galaxy.",
            "I just want a reliable power converter. Is that too much to ask? This 'civic tech' sounds like more bureaucracy.",
            "I'm already dreading the help desk calls. 'My Death Star won't fire!' 'The Force isn't working!'",
            "This is just a fancy way to track our X-wings. We'll find a way to disable it, just like we did with the Death Star.",
            "Another reason to drown my sorrows in a Jawa Juice. This whole thing stinks of the Empire's incompetence.",
            "This initiative is a waste of resources. We should be focusing on military expansion, not 'civic engagement.'"
        ]
        # Comments not in pro or anti lists are considered neutral/other for this example

        votes_to_insert = []

        # Cluster 1 (Pro-Tech) votes: Agree with pro, Disagree with anti, Mixed/Neutral on others
        for user_id in cluster1_users:
            for comment_text in comments_list:
                comment_id = comment_id_map.get(comment_text)
                if not comment_id: continue

                vote_type = None
                if comment_text in pro_tech_comments:
                    vote_type = 'agree'
                elif comment_text in anti_tech_comments:
                    vote_type = 'disagree'
                else:
                    vote_type = 'neutral'

                if vote_type:
                     # Generate UUID for vote ID and append to list
                     vote_id = str(uuid.uuid4())
                     votes_to_insert.append((vote_id, user_id, comment_id, vote_type))


        # Cluster 2 (Anti-Tech) votes: Disagree with pro, Agree with anti, Mixed/Neutral on others
        for user_id in cluster2_users:
             for comment_text in comments_list:
                comment_id = comment_id_map.get(comment_text)
                if not comment_id: continue

                vote_type = None
                if comment_text in pro_tech_comments:
                    vote_type = 'disagree'
                elif comment_text in anti_tech_comments:
                    vote_type = 'agree'
                else:
                    vote_type = 'neutral'

                if vote_type:
                     # Generate UUID for vote ID and append to list
                     vote_id = str(uuid.uuid4())
                     votes_to_insert.append((vote_id, user_id, comment_id, vote_type))

        # Cluster 3 (Mixed/Neutral) votes: Mostly neutral, some random agree/disagree, many skipped
        for user_id in cluster3_users:
             for comment_text in comments_list:
                comment_id = comment_id_map.get(comment_text)
                if not comment_id: continue

                # Mostly neutral, some random agree/disagree, many skipped
                vote_type = random.choice(['neutral'] * 8 + ['agree', 'disagree'])

                if vote_type:
                     # Generate UUID for vote ID and append to list
                     vote_id = str(uuid.uuid4())
                     votes_to_insert.append((vote_id, user_id, comment_id, vote_type))

        # Insert all collected votes - Added 'id' to the insert statement
        if votes_to_insert:
             con.executemany("""
                 INSERT INTO votes (id, user_id, comment_id, vote_type)
                 VALUES (?, ?, ?, ?)
                 ON CONFLICT (user_id, comment_id) DO NOTHING
             """, votes_to_insert)


        con.commit()
        # print(f"Successfully added topic '{topic_title}', {len(comments_list)} comments, and {len(all_cluster_users)} cluster users with votes.") # Use print for console output
        # st.success(f"Successfully added topic '{topic_title}', {len(comments_list)} comments, and {len(all_cluster_users)} cluster users with votes.") # Use st.success if in Streamlit context

    except Exception as e:
        if con:
            con.rollback()
        print(f"Error adding example topic '{topic_title}' and votes: {e}") # Use print for console output
        # st.error(f"Error adding example topic '{topic_title}' and votes: {e}") # Use st.error if in Streamlit context
    finally:
        if con:
            con.close()


# Example usage (can be called elsewhere, e.g., in an initialization script)
def add_dummy_topic():
    example_topic_id = "15736626"
    example_topic_title = "New Civic Tech Initiative"
    example_topic_description = "Seeker, your mission is to assess the true sentiment regarding the Emperor's new 'Civic Tech' initiative. While officially presented as a means for streamlined governance, enhanced citizen engagement, and a more user-friendly experience, the Emperor requires a candid report on public perception. Gather intelligence on whether citizens view this as a path to order or harbor suspicions of a darker purpose. Report their unfiltered opinions."
    example_comments = [
        "Finally! A system to track rebel scum more efficiently. This will be a glorious day for the Empire!",
        "This is clearly a data-mining operation. They're going to use it to crush the Rebellion. We need to sabotage it!",
        "The Force guides us to see through their deception. This 'civic tech' will only serve to tighten their grip on the galaxy.",
        "As long as it doesn't mess with my profit margins, I'm indifferent. But I suspect it will.",
        "I just want a reliable power converter. Is that too much to ask? This 'civic tech' sounds like more bureaucracy.",
        "Anything that improves the speed of selling junk is good in my book. Maybe I can finally get a decent price for this thermal detonator...",
        "Fascinating! I am programmed to be compliant. I shall analyze this initiative and report my findings to the Emperor.",
        "I'm already dreading the help desk calls. 'My Death Star won't fire!' 'The Force isn't working!'",
        "This is a welcome step towards greater efficiency and transparency... cough... as long as it doesn't affect my personal interests.",
        "This is just a fancy way to track our X-wings. We'll find a way to disable it, just like we did with the Death Star.",
        "As long as it helps me track down my targets, I'm in. The more data, the better.",
        "Another reason to drown my sorrows in a Jawa Juice. This whole thing stinks of the Empire's incompetence.",
        "The Emperor's vision is one of unparalleled order and prosperity! This initiative will usher in a new era of galactic harmony!",
        "Will it have cool spaceships in it? Can I play with it?",
        "Beware the allure of technology. It can be a tool for both good and evil. Trust in the Force, young Padawans.",
        "This initiative is a waste of resources. We should be focusing on military expansion, not 'civic engagement.'",
        "I'm interested... Will it help me collect debts more efficiently?",
        "I'm just trying to survive. This sounds like more trouble than it's worth.",
        "If it improves the entertainment options on Coruscant, I'm all for it.",
        "Another set of orders. Understood, sir!",
        "Excellent... with this, I will have even greater control over the galaxy... cackles maniacally"
    ]
    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=True)
        # Check if the example topic already exists
        result = local_con.execute("SELECT * FROM topics WHERE id = ?", [example_topic_id]).fetchone()
        if result:
            print(f"INFO: Topic '{example_topic_id}' already exists. Skipping dummy data insertion.")
            return # Skip adding dummy data if topic exists
    except Exception as e:
        # Log error but continue, assuming topic doesn't exist if check fails
        print(f"WARNING: Error checking for existing topic '{example_topic_id}': {e}")
        # Don't return, proceed with adding data in case of check error
    finally:
        if local_con:
            local_con.close()
    add_example_topic(example_topic_id, example_topic_title, example_topic_description, example_comments)


def get_ttl_hash(seconds=360):
    """Return the same value withing `seconds` time period"""
    return round(time.time() / seconds)

# Helper function to get the R matrix from user voting data
# This matrix represents user-comment interactions (votes)
# Users are rows, comments are columns.
# Values: 1 for 'agree', 0 for 'neutral', -1 for 'disagree', NaN for unvoted.
# Requires pandas and numpy.
def get_r_matrix_from_votes():
    local_con = None
    try:
        # Use read_only=False to maintain consistent configuration across all connections
        local_con = duckdb.connect(database=DB_PATH, read_only=False)

        # Fetch all vote data
        # fetchdf requires pandas
        votes_df = local_con.execute("""
            SELECT user_id, comment_id, vote_type
            FROM votes
        """).fetchdf()

        if votes_df.empty:
            # Return empty matrix and mappings if no votes exist
            # pd.DataFrame requires pandas
            return pd.DataFrame(), {}, {}

        # Map vote types to numerical values
        vote_mapping = {'agree': 1, 'neutral': 0, 'disagree': -1}
        votes_df['vote_value'] = votes_df['vote_type'].map(vote_mapping)

        # Create the R matrix using pivot_table
        # This automatically handles missing user-comment pairs by filling with NaN
        # pivot_table requires pandas
        r_matrix = votes_df.pivot_table(
            index='user_id',
            columns='comment_id',
            values='vote_value'
        )

        # Create mappings from user/comment IDs to matrix indices (optional but useful)
        user_id_to_index = {user_id: i for i, user_id in enumerate(r_matrix.index)}
        comment_id_to_index = {comment_id: i for i, comment_id in enumerate(r_matrix.columns)}

        return r_matrix, user_id_to_index, comment_id_to_index

    except Exception as e:
        # st.error is not available here, just print or log
        print(f"Error generating R matrix: {e}")
        # Return empty results in case of error
        # pd.DataFrame requires pandas
        return pd.DataFrame(), {}, {}
    finally:
        if local_con:
            local_con.close()


# Function to get clusters using HDBSCAN with the custom Hamming distance
# Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
def get_clusters_from_r_matrix(r_matrix):
    """
    Performs HDBSCAN clustering on the R matrix using a custom Hamming-like distance
    that handles NaN values.

    Args:
        r_matrix (pd.DataFrame): The user-comment vote matrix from get_r_matrix_from_votes.
                                 Index should be user_id, columns comment_id.

    Returns:
        np.ndarray: An array of cluster labels for each user in the r_matrix index.
                    -1 indicates noise. Returns empty array if clustering fails or
                    r_matrix is empty.
    """
    # Check if r_matrix is empty
    if r_matrix.empty:
        print("R matrix is empty, cannot perform clustering.")
        return np.array([])

    try:
        # Instantiate HDBSCAN with the custom metric
        # Using default parameters for min_cluster_size and min_samples
        # These might need tuning based on data characteristics and desired cluster granularity
        # allow_single_cluster=True prevents an error if all points form one cluster
        clusterer = hdbscan.HDBSCAN(
            metric='hamming',
            allow_single_cluster=True,
            min_cluster_size=max(int(np.sqrt(len(r_matrix))), 3),
            min_samples=None)

        # Fit the model directly to the DataFrame values
        # HDBSCAN fit expects a numpy array or similar structure
        clusterer.fit(r_matrix.values)

        # Return the cluster labels
        return clusterer.labels_

    except Exception as e:
        # In a Streamlit app context, st.error would be better, but not available here.
        # Print to console/logs.
        print(f"Error during HDBSCAN clustering: {e}")
        return np.array([]) # Return empty array on error


def get_cluster_labels(user_id):
    r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
    # Check if the user_id exists in the matrix index
    if user_id not in user_id_to_index:
        print(f"Warning: User ID '{user_id}' not found in the R matrix. Cannot perform user-specific filtering for clustering.")
        # Return empty results as filtering based on this user is not possible.
        # The downstream function get_user_cluster_label handles the user not being in the index.
        # Returning empty arrays/dict matches the structure of the expected return value.
        return np.array([]), {} # Return empty labels and empty index map

    # Get the row for the specific user
    user_row = r_matrix.loc[user_id]

    # Find columns where the user has voted (values are not NaN)
    voted_comment_ids = user_row.dropna().index

    # Ensure we handle the case where the user hasn't voted on anything
    if voted_comment_ids.empty:
        print(f"Warning: User ID '{user_id}' has not voted on any comments. Cannot perform clustering based on votes.")
        # If no votes, no columns to cluster on. Return empty results.
        return np.array([]), {}

    # Filter the r_matrix to include only these columns
    # This is the matrix that will be used for clustering in the next step.
    # The subsequent line calling get_clusters_from_r_matrix should use this variable.
    r_matrix = r_matrix[voted_comment_ids]
    cluster_labels = get_clusters_from_r_matrix(r_matrix)
    if len(cluster_labels) == 0:
        cluster_labels = [0] * len(user_id_to_index)
    return cluster_labels, user_id_to_index


# Function to get the cluster label for a specific user
@lru_cache()
def get_user_cluster_label(user_id, ttl_hash=None):
    """
    Gets the HDBSCAN cluster label for a specific user and a list of users
    sharing the same cluster.

    Args:
        user_id (str): The ID of the user.

    Returns:
        tuple: A tuple containing:
            - int or None: The cluster label (an integer, -1 for noise) if the user
                           is found in the clustering result, otherwise None.
            - list[str]: A list of user IDs (including the input user_id if found)
                         that belong to the same cluster. Returns an empty list
                         if the user is not found or has no cluster label.
    """
    # get_cluster_labels is already cached, so calling it repeatedly is fine
    cluster_labels, user_id_to_index = get_cluster_labels(user_id)

    # Create a reverse mapping from index to user_id for easier lookup
    index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}

    target_cluster_label = None
    same_cluster_users = []

    # Check if the user_id exists in the mapping
    if user_id in user_id_to_index:
        user_index = user_id_to_index[user_id]
        # Ensure the index is within the bounds of the cluster_labels array
        if 0 <= user_index < len(cluster_labels):
            target_cluster_label = int(cluster_labels[user_index]) # Get the target label

            # Find all users with the same cluster label
            for index, current_user_id in index_to_user_id.items():
                # Ensure the index is valid for cluster_labels
                if 0 <= index < len(cluster_labels):
                    current_user_label = int(cluster_labels[index])
                    if current_user_label == target_cluster_label:
                        same_cluster_users.append(current_user_id)
                else:
                     # This case should ideally not happen if index_to_user_id is consistent
                     print(f"Warning: Index {index} from index_to_user_id out of bounds for cluster labels array length {len(cluster_labels)}")


        else:
            # This case should ideally not happen if user_id_to_index is consistent
            print(f"Warning: User index {user_index} out of bounds for cluster labels array length {len(cluster_labels)}")
            # Return None and empty list as user couldn't be processed
            return None, []
    else:
        # User not found in the R matrix used for clustering (e.g., new user with no votes)
        # print(f"User ID {user_id} not found in clustering data.") # Optional: for debugging
        # Return None and empty list as user is not part of the current clustering result
        return None, []

    # Return the target user's label and the list of users in that cluster
    return target_cluster_label, same_cluster_users


# Helper function to get top k most polarized comments for a list of users
def get_top_k_consensus_comments_for_users(user_ids, topic_id, k=5):
    """
    Retrieves the top k comments with the highest voting consensus (lowest variance)
    among a given list of users *for a specific topic*.

    Consensus is measured by the population variance (VAR_POP) of numerical
    vote scores (-1 for 'disagree', 0 for 'neutral', 1 for 'agree').
    Lower variance indicates higher consensus.

    Args:
        user_ids (list[str]): A list of user IDs.
        topic_id (str): The ID of the topic to filter comments by.
        k (int): The number of top comments to retrieve.

    Returns:
        list[tuple]: A list of tuples, where each tuple contains
                     (comment_id, comment_content, vote_variance),
                     ordered by vote_variance ascending (lowest variance first).
                     Returns an empty list if no votes are found for these users
                     on this topic, or on error, or if the group has fewer than 2 users.
    """
    if not user_ids or len(user_ids) < 2:
        # Need at least 2 users from the group to calculate meaningful variance
        # print("Warning: get_top_k_consensus_comments_for_users called with fewer than 2 user_ids.") # Optional debug
        return [] # Cannot query without user IDs or with only one user

    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=True)

        # Use parameterized query for the list of user IDs and topic ID
        # DuckDB's Python API handles lists for IN clauses
        query = """
            SELECT
                v.comment_id,
                c.content,
                VAR_POP(CASE
                    WHEN v.vote_type = 'agree' THEN 1.0
                    WHEN v.vote_type = 'neutral' THEN 0.0
                    WHEN v.vote_type = 'disagree' THEN -1.0
                    ELSE NULL -- Should not happen with current data
                END) as vote_variance,
                COUNT(v.user_id) as num_votes_in_group -- Include count for potential tie-breaking
            FROM votes v
            JOIN comments c ON v.comment_id = c.id
            WHERE v.user_id IN (?) AND c.topic_id = ? -- Filter by user IDs and topic ID
            GROUP BY v.comment_id, c.content
            HAVING COUNT(v.user_id) >= 2 -- Ensure at least 2 users from the list voted on this comment
            ORDER BY vote_variance ASC, num_votes_in_group DESC -- Order by lowest variance, then by number of votes (more votes = stronger consensus)
            LIMIT ?
        """
        # Pass the list of user_ids, topic_id, and k as parameters
        # DuckDB requires list parameters to be wrapped in a list/tuple for the execute method
        result = local_con.execute(query, [user_ids, topic_id, k]).fetchall()

        # The result includes comment_id, content, variance, and count.
        # We only need comment_id, content, and variance for the return value as per docstring.
        # The count was used for ordering.
        formatted_result = [(row[0], row[1], row[2]) for row in result]

        return formatted_result

    except Exception as e:
        # st.error is not available here, just print or log
        print(f"Error getting top k consensus comments for users {user_ids} in topic {topic_id}: {e}")
        return [] # Return empty list on error
    finally:
        if local_con:
            local_con.close()


def estimate_group_voting_diversity(user_ids, topic_id):
    """
    Estimates the diversity of voting within a group of users for a specific topic.
    Diversity is measured by the average variance of numerical vote scores (-1, 0, 1)
    across comments that at least two users in the group have voted on.

    Args:
        user_ids (list[str]): A list of user IDs belonging to the group.
        topic_id (str): The ID of the topic.

    Returns:
        float: A diversity score between 0.0 and 1.0. 0.0 indicates no diversity
               (all users voted the same way on all shared comments), 1.0 indicates
               maximum possible diversity (e.g., half agree, half disagree on shared comments).
               Returns 0.0 if the group has less than 2 users or if no comments
               were voted on by at least two users in the group.
    """
    # Convert list to tuple for caching purposes (tuples are hashable)
    user_ids_tuple = tuple(user_ids)

    if not user_ids_tuple or len(user_ids_tuple) < 2:
        return 0.0

    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=True)

        # Get all votes for the given topic by the specified users
        # Join with comments to filter by topic_id
        # Construct the IN clause dynamically to avoid the conversion error
        placeholders = ', '.join(['?'] * len(user_ids_tuple))
        query = f"""
            SELECT
                v.comment_id,
                v.user_id,
                v.vote_type
            FROM votes v
            JOIN comments c ON v.comment_id = c.id
            WHERE c.topic_id = ? AND v.user_id IN ({placeholders})
        """
        # Pass topic_id and then all user_ids as separate parameters
        params = [topic_id] + list(user_ids_tuple) # Combine topic_id and user_ids
        results = local_con.execute(query, params).fetchall()

        if not results:
            return 0.0 # No votes found for this group on this topic

        # Map vote types to numerical scores
        vote_map = {'agree': 1.0, 'neutral': 0.0, 'disagree': -1.0}

        # Group votes by comment ID
        votes_by_comment = {}
        for comment_id, user_id, vote_type in results:
            if comment_id not in votes_by_comment:
                votes_by_comment[comment_id] = []
            # Append the numerical vote score
            votes_by_comment[comment_id].append(vote_map.get(vote_type, 0.0)) # Default to 0.0 for unknown types

        # Calculate variance for comments voted on by at least two users in the group
        variances = []
        for comment_id, comment_votes in votes_by_comment.items():
            # Ensure the comment was voted on by at least two users from the input list
            if len(comment_votes) >= 2:
                # Use numpy to calculate variance
                variances.append(np.var(comment_votes))

        if not variances:
            return 0.0 # No comments voted on by at least two users in the group

        # The maximum possible variance for values in [-1, 0, 1] is 1.0
        # (e.g., half votes are 1, half are -1).
        # The average variance is already in the range [0, 1].
        average_variance = np.mean(variances)

        return average_variance

    except Exception as e:
        # st.error is not available here, just print or log
        print(f"Error estimating group voting diversity for topic {topic_id} and users {user_ids_tuple}: {e}")
        return 0.0 # Return 0.0 on error
    finally:
        if local_con:
            local_con.close()

# Helper function to name a group of users based on their participation and voting diversity
def name_user_group(user_ids, topic_id):
    """
    Generates a descriptive name and description for a group of users within a
    specific topic based on their participation level and voting diversity,
    themed around a seeker's journey through different settlements.

    Args:
        user_ids (list[str]): A list of user IDs belonging to the group.
        topic_id (str): The ID of the topic.

    Returns:
        tuple[str, str]: A tuple containing the name and description for the group.
                         Returns ("The Silent Threshold", "You sense a presence, but no clear voices emerge from this place.")
                         or ("The Silent Valley", "A valley where the inhabitants reside, but their voices are silent on this matter.")
                         or ("The Untrodden Path", "This path has not yet been explored by any travelers.")
                         or ("The Shrouded Keep", "A place hidden by mystery and uncertainty.")
                         in edge cases or on error.
    """
    # Handle empty user list - implies no specific group is being considered, but not necessarily an empty world
    if not user_ids:
        return "The Silent Threshold", "You sense a presence, but no clear voices emerge from this place."

    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=True)

        # 1. Get total unique users who voted in the topic
        # Specify v.user_id to avoid ambiguity
        total_voters_result = local_con.execute("""
            SELECT COUNT(DISTINCT v.user_id)
            FROM votes v
            JOIN comments c ON v.comment_id = c.id
            WHERE c.topic_id = ?
        """, [topic_id]).fetchone()
        total_voters_in_topic = total_voters_result[0] if total_voters_result else 0

        # 2. Get unique users from the input list who voted in the topic
        # Filter user_ids to only those present in the votes table for this topic
        # Construct the IN clause dynamically to avoid casting issues
        # This part correctly uses the list by expanding placeholders
        placeholders = ', '.join(['?'] * len(user_ids))
        group_voters_query = f"""
            SELECT COUNT(DISTINCT v.user_id)
            FROM votes v
            JOIN comments c ON v.comment_id = c.id
            WHERE c.topic_id = ? AND v.user_id IN ({placeholders})
        """
        # Pass topic_id and then all user_ids as separate parameters
        group_voters_result = local_con.execute(group_voters_query, [topic_id] + user_ids).fetchone()
        group_voters_count = group_voters_result[0] if group_voters_result else 0

        # Handle case where no one in the group has voted on this topic
        # The group members exist, but are silent on this specific matter
        if group_voters_count == 0:
             return "The Silent Valley", "A valley where the inhabitants reside, but their voices are silent on this matter."

        # Handle case where topic has no voters at all (the path hasn't been explored)
        if total_voters_in_topic == 0:
             # This case is unlikely if group_voters_count > 0, but for safety
             return "The Untrodden Path", "This path has not yet been explored by any travelers."


        # 3. Calculate significance (proportion of group voters among all topic voters)
        significance_proportion = group_voters_count / total_voters_in_topic

        # 4. Get diversity score for the group
        diversity_score = estimate_group_voting_diversity(user_ids, topic_id)

        # 5. Determine name and description based on significance and diversity
        # Define thresholds (can be tuned)
        SIG_LOW_THRESHOLD = 0.1
        SIG_MED_THRESHOLD = 0.5 # High if > MED, Med if > LOW and <= MED, Low if <= LOW
        DIV_LOW_THRESHOLD = 0.2
        DIV_MED_THRESHOLD = 0.5 # High if > MED, Med if > LOW and <= MED, Low if <= LOW

        significance_level = "low"
        if significance_proportion > SIG_MED_THRESHOLD:
            significance_level = "high"
        elif significance_proportion > SIG_LOW_THRESHOLD:
            significance_level = "medium"

        diversity_level = "low"
        if diversity_score > DIV_MED_THRESHOLD:
            diversity_level = "high"
        elif diversity_score > DIV_LOW_THRESHOLD:
            diversity_level = "medium"

        # Assign names and descriptions based on levels (Themed for seeker quest)
        if significance_level == "high":
            if diversity_level == "low":
                return "The Village of Unity", "A large settlement where the inhabitants share a common, strong belief."
            elif diversity_level == "medium":
                return "The Town of Common Ground", "A bustling town where most agree, though minor differences are acknowledged."
            else: # high diversity
                return "The City of Many Voices", "A major city where travelers from all paths meet and share a wide array of perspectives."
        elif significance_level == "medium":
            if diversity_level == "low":
                return "The Hamlet of Quiet Accord", "A peaceful hamlet of moderate size where opinions align with little dissent."
            elif diversity_level == "medium":
                return "The Crossroads Inn", "An inn of moderate size where various travelers pause, sharing somewhat varied views."
            else: # high diversity
                return "The Debating Circle", "A gathering place of moderate size known for its spirited and diverse discussions."
        else: # low significance
            if diversity_level == "low":
                return "The Like-Minded Few", "A small, isolated group whose thoughts resonate closely."
            elif diversity_level == "medium":
                return "The Scattered Camps", "A few scattered camps where different, quiet thoughts reside."
            else: # high diversity
                return "The Whispering Caves", "A small, hidden network of caves where many different ideas are shared in hushed tones."

    except Exception as e:
        st.error(f"Error naming user group for topic {topic_id} and users {user_ids}: {e}")
        return "The Shrouded Keep", "A place hidden by mystery and uncertainty." # Default name and description on error
    finally:
        if local_con:
            local_con.close()


# Helper function to get a random unvoted comment
def get_random_unvoted_comment(user_id, topic_id):
    new_area_comments = st.session_state.get("_new_area_comments", [])
    if len(new_area_comments) != 0:
        value = new_area_comments.pop()
        st.session_state._new_area_comments = new_area_comments
        return value[0], value[1]
    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=False)

        # First, check if there are any comments at all in the topic
        comment_count = local_con.execute("""
            SELECT COUNT(*) FROM comments WHERE topic_id = ?
        """, [topic_id]).fetchone()[0]

        if comment_count == 0:
            return None, "Share your insight!"

        # Attempt to get a random comment that the user has NOT voted on
        result = local_con.execute("""
            SELECT c.id, c.content
            FROM comments c
            WHERE c.topic_id = ?
            AND NOT EXISTS (
                SELECT * FROM votes v
                WHERE v.comment_id = c.id AND v.user_id = ?
            )
            ORDER BY RANDOM()
            LIMIT 1
        """, [topic_id, user_id]).fetchone()
        if result:
            # Check for cluster change and set message flag
            current_label, current_users = get_user_cluster_label(user_id, get_ttl_hash(10))
            current_users_set = set(current_users)

            previous_label = st.session_state.get('_previous_cluster_label')
            previous_users_set = st.session_state.get('_previous_cluster_users_set', set())

            # Check if cluster label has changed AND the set of users in the new cluster is different
            # This indicates the user has moved to a different group of commenters
            if current_label is not None and previous_label is not None and current_label != previous_label:
                # Calculate overlap (Jaccard Index)
                intersection_size = len(current_users_set.intersection(previous_users_set))
                union_size = len(current_users_set.union(previous_users_set))

                # Check if overlap (Jaccard) is over 70%
                # Handle case where union_size is 0 (both sets empty)
                if union_size > 0 and (intersection_size / union_size) > 0.7:
                    # Set a flag in session state to display the message later in the main rendering logic
                    st.session_state._show_new_area_message = True
                    # Fetch comments from the NEW area (current_users_set)
                    # Note: get_top_k_consensus_comments_for_users expects a list, not a set
                    new_area_comments = get_top_k_consensus_comments_for_users(list(current_users_set), topic_id, k=5)
                    st.session_state._new_area_comments = new_area_comments
                    # print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
                    # print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
            st.session_state._previous_cluster_label = current_label
            st.session_state._previous_cluster_users_set = current_users_set

            # Found an unvoted comment
            return result[0], result[1]
        else:
            # No unvoted comments found for this user in this topic
            return None, "No new thoughts for now..."

    except Exception as e:
        st.error(f"Error getting random unvoted comment: {e}")
        return None, f"Error loading comments: {str(e)}"
    finally:
        if local_con:
            local_con.close()

# Helper function to find or create a user
def find_or_create_user(username):
    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=False)
        user_result = local_con.execute("SELECT id FROM users WHERE username = ?", [username]).fetchone()
        if user_result:
            return user_result[0]
        else:
            user_id = str(uuid.uuid4())
            local_con.execute("INSERT INTO users (id, username) VALUES (?, ?)", [user_id, username])
            return user_id
    except Exception as e:
        st.error(f"Error finding or creating user: {e}")
        return None
    finally:
        if local_con:
            local_con.close()

# Helper function to update user progress
def update_user_progress(user_id, topic_id, comment_id):
    local_con = None
    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=False)
        progress_id = str(uuid.uuid4())
        local_con.execute("""
            INSERT INTO user_progress (id, user_id, topic_id, last_comment_id_viewed) VALUES (?, ?, ?, ?)
            ON CONFLICT (user_id, topic_id) DO UPDATE SET
                last_comment_id_viewed = EXCLUDED.last_comment_id_viewed
        """, [progress_id, user_id, topic_id, comment_id])
    except Exception as e:
        st.error(f"Error updating user progress: {e}")
    finally:
        if local_con:
            local_con.close()

# --- Page Functions ---

def home_page():
    st.title("Welcome to SteamPolis")
    st.markdown("Choose an option:")

    if st.button("Create New Topic (Quest)"):
        st.session_state.page = 'create_topic'
        st.rerun()

    st.markdown("---")
    st.markdown("Or join an existing topic (quest):")
    topic_input = st.text_input("Enter Topic ID or URL")

    if st.button("Join Topic"):
        topic_id = topic_input.strip()
        if topic_id.startswith('http'): # Handle full URL
             parsed_url = urllib.parse.urlparse(topic_id)
             query_params = urllib.parse.parse_qs(parsed_url.query)
             topic_id = query_params.get('topic', [None])[0]

        if topic_id:
            st.session_state.page = 'view_topic'
            st.session_state.current_topic_id = topic_id
            # Attempt to load email from session state (mimics browser state)
            # If email exists, handle email submission logic immediately on view page load
            st.rerun()
        else:
            st.warning("Please enter a valid Topic ID or URL.")


def create_topic_page():
    st.title("Create a New Topic")

    new_topic_name = st.text_input("Topic Name (Imagine you are the king, how would you share your concern)")
    new_topic_description = st.text_area('Description (Begin with "I want to figure out...", imagine you are the king, what would you want to know)', height=150)
    new_topic_seed_comments = st.text_area("Initial Comments (separate by new line, imagine there are civilians what will they answer)", height=200)
    creator_email = st.text_input("Enter your Email (required for creation)")

    if st.button("Create Topic"):
        if not creator_email:
            st.error("Email is required to create a topic.")
            return

        topic_id = str(uuid.uuid4())[:8]
        user_id = find_or_create_user(creator_email)

        if user_id:
            local_con = None
            try:
                local_con = duckdb.connect(database=DB_PATH, read_only=False)
                local_con.execute("INSERT INTO topics (id, name, description) VALUES (?, ?, ?)", [topic_id, new_topic_name, new_topic_description])

                seed_comments = [c.strip() for c in new_topic_seed_comments.split('\n') if c.strip()]
                for comment in seed_comments:
                    comment_id = str(uuid.uuid4())
                    local_con.execute("INSERT INTO comments (id, topic_id, user_id, content) VALUES (?, ?, ?, ?)",
                                      [comment_id, topic_id, 'system', comment])

                # Get the first comment to display after creation
                comment_to_display_id, comment_to_display_content = get_random_unvoted_comment(user_id, topic_id)

                # Set initial progress for creator
                update_user_progress(user_id, topic_id, comment_to_display_id)

                st.session_state.page = 'view_topic'
                st.session_state.current_topic_id = topic_id
                st.session_state.user_email = creator_email # Store email in session state
                st.session_state.current_comment_id = comment_to_display_id
                st.session_state.current_comment_content = comment_to_display_content
                st.session_state.comment_history = ""

                st.success(f"Topic '{new_topic_name}' created!")
                st.rerun()

            except Exception as e:
                st.error(f"Error creating topic: {e}")
            finally:
                if local_con:
                    local_con.close()
        else:
            st.error("Could not find or create user.")


    if st.button("Back to Home"):
        st.session_state.page = 'home'
        st.rerun()


def view_topic_page():
    topic_id = st.session_state.get('current_topic_id')
    user_email = st.session_state.get('user_email', '')
    current_comment_id = st.session_state.get('current_comment_id')
    current_comment_content = st.session_state.get('current_comment_content', "Loading comments...")
    comment_history = st.session_state.get('comment_history', "")

    if not topic_id:
        st.warning("No topic selected. Returning to home.")
        st.session_state.page = 'home'
        st.rerun()
        return

    local_con = None
    topic_name = "Loading..."
    topic_description = "Loading..."

    try:
        local_con = duckdb.connect(database=DB_PATH, read_only=True)
        topic_result = local_con.execute("SELECT name, description FROM topics WHERE id = ?", [topic_id]).fetchone()
        if topic_result:
            topic_name, topic_description = topic_result
        else:
            st.error(f"Topic ID '{topic_id}' not found.")
            st.session_state.page = 'home'
            st.rerun()
            return
    except Exception as e:
        st.error(f"Error loading topic details: {e}")
        if local_con:
             local_con.close()
        st.session_state.page = 'home'
        st.rerun()
        return
    finally:
        if local_con:
            local_con.close()


    # Include functional information
    st.markdown(f"**Shareable Quest Scroll ID:** `{topic_id}`")
    # Construct shareable link using current app URL
    app_url = st.query_params.get('base', [DEFAULT_BASE_URL])[0] # Get base URL if available
    shareable_link = f"{app_url}?topic={topic_id}" if app_url else f"?topic={topic_id}"
    st.markdown(f"**Shareable Scroll Link:** `{shareable_link}`")

    st.title("Seeker Quest")

    # Check if user email is available in session state.
    # user_email is already retrieved from st.session_state at the start of view_topic_page.
    if user_email:
        # Get the user ID. find_or_create_user handles the DB connection internally.
        user_id = find_or_create_user(user_email)
        if user_id:
            # Check if user has any progress recorded for this specific topic.
            # This indicates they have viewed comments or interacted before.
            local_con = None
            progress_exists = False
            try:
                local_con = duckdb.connect(database=DB_PATH, read_only=True)

                # Check if the user has voted on any comment in this topic
                # This requires joining votes with comments to filter by topic_id
                voted_result = local_con.execute("""
                    SELECT 1
                    FROM votes v
                    JOIN comments c ON v.comment_id = c.id
                    WHERE v.user_id = ? AND c.topic_id = ?
                    LIMIT 1
                """, [user_id, topic_id]).fetchone()

                # Check if the user has submitted any comment in this topic
                commented_result = local_con.execute("""
                    SELECT 1
                    FROM comments
                    WHERE user_id = ? AND topic_id = ?
                    LIMIT 1
                """, [user_id, topic_id]).fetchone()

                # Progress exists if the user has either voted or commented in this topic
                progress_exists = voted_result is not None or commented_result is not None
            except Exception as e:
                # Log error but don't stop the app. Assume no progress on error.
                st.error(f"Error checking user progress for greeting: {e}")
                # progress_exists remains False
            finally:
                if local_con:
                    local_con.close()

            # Display the appropriate greeting based on progress
            if progress_exists:
                # Acknowledge return and remind of quest
                st.markdown("Welcome back, Seeker. Your journey through the whispers of Aethelgard continues.")
                st.markdown(f"You pause to recall the heart of the Emperor's concern regarding **{topic_name}**: `{topic_description}`.")

                # Introduce the next comment
                st.markdown("As you press onward, you encounter another soul willing to share their thoughts on this vital matter.")
            else:
                # Introduce the setting and the Emperor's concern
                st.markdown("Welcome, Seeker, to the ancient Kingdom of Aethelgard, a realm of digital whispers and forgotten wisdom.")
                st.markdown("For centuries, Aethelgard has stood, preserving the echoes of an age long past. But now, a matter of great weight troubles the Emperor's thoughts.")
                st.markdown(f"The Emperor seeks clarity on a crucial topic: **`{topic_name}`**.")

                # Explain the quest and the user's role
                st.markdown("You, among a select few, have been summoned for a vital quest: to traverse the kingdom, gather insights, and illuminate this matter for the throne.")
                st.markdown(f"At a recent royal gathering, the Emperor revealed the heart of their concern, the very essence of your mission: `{topic_description}`")

                # Transition to the task
                st.markdown("Your journey begins now. The path leads to the first village, where the voices of the realm await your ear.")


    # --- Email Prompt ---
    if not user_email:
        st.subheader("Enter your Email to view comments and progress")
        view_user_email_input = st.text_input("Your Email", key="view_email_input")
        if st.button("Submit Email", key="submit_view_email"):
            if view_user_email_input:
                st.session_state.user_email = view_user_email_input
                user_id = find_or_create_user(view_user_email_input)
                if user_id:
                    comment_to_display_id, comment_to_display_content = get_random_unvoted_comment(user_id, topic_id)
                    st.session_state.current_comment_id = comment_to_display_id
                    st.session_state.current_comment_content = comment_to_display_content
                    update_user_progress(user_id, topic_id, comment_to_display_id)
                    st.session_state.comment_history = "" # Reset history on new email submission
                    st.rerun()
                else:
                    st.error("Could not find or create user with that email.")
            else:
                st.warning("Please enter your email.")
        return # Stop rendering the rest until email is submitted

    # --- Comment Display and Voting ---
    # Define introductory phrases for encountering a new perspective
    intro_phrases = [
        "A new whisper reaches your ear",
        "You ponder a fresh perspective",
        "Another voice shares their view",
        "A thought emerges from the crowd",
        "The wind carries a new idea",
        "Someone offers an insight",
        "You overhear a comment",
        "A different angle appears",
        "The village elder shares",
        "A traveler murmurs",
    ]
    # Randomly select a phrase
    random_phrase = random.choice(intro_phrases)
    st.markdown(comment_history)

    if current_comment_id: # Only show voting if there's a comment to vote on
        # Display comment history and the current comment with the random intro
        if st.session_state.get('_show_new_area_message', True):
            st.session_state._show_new_area_message = False
            _, user_ids = get_user_cluster_label(user_id, get_ttl_hash(10))
            new_area_name, desc = name_user_group(user_ids, topic_id)
            for statm in [
                f"๐Ÿ“š You've collected **{len(comment_history.splitlines())}** insights this time.",
                f"๐Ÿ—บ๏ธ And yet, your journey leads you to a new place: **{new_area_name}**! {desc}"]:
                st.markdown(statm)
                st.session_state.comment_history += f"\n\n{statm}"
        st.markdown(f"[Collected new insight, {random_phrase}]:\n* {current_comment_content}")

        # Handle vote logic
        def handle_vote(vote_type, comment_id, topic_id, user_id):
            # Add JavaScript to scroll to the bottom anchor after the page reloads
            # This script will be included in the next render cycle triggered by st.rerun()
            # Ensure an element with id="bottom" exists in the rendered page,
            # typically placed after the content you want to scroll to (e.g., comment history).
            local_con = None
            try:
                local_con = duckdb.connect(database=DB_PATH, read_only=False)
                # Use INSERT OR REPLACE INTO or ON CONFLICT DO UPDATE to handle repeat votes
                # The UNIQUE constraint on (user_id, comment_id) in the votes table
                # allows us to update the existing vote if one already exists for this user/comment pair.
                # We generate a new UUID for the 'id' column, but it will only be used
                # if this is a new insert. If it's an update, the existing 'id' is kept.
                vote_id = str(uuid.uuid4()) # Generate a new UUID for the potential insert
                local_con.execute("""
                    INSERT INTO votes (id, user_id, comment_id, vote_type)
                    VALUES (?, ?, ?, ?)
                    ON CONFLICT (user_id, comment_id)
                    DO UPDATE SET
                        vote_type = excluded.vote_type, -- Update vote_type with the new value
                        created_at = current_localtimestamp(); -- Update timestamp to reflect the latest vote
                """, [vote_id, user_id, comment_id, vote_type])

                # Append voted comment to history
                # Note: This appends the comment regardless of whether it was a new vote or an update.
                # The history is a simple log, not a reflection of vote changes.
                vote_text = "๐Ÿ‘" if vote_type == "agree" else "๐Ÿ‘Ž" if vote_type == "disagree" else "๐Ÿ˜"
                comment_history = st.session_state.comment_history.split("\n\n")
                if len(comment_history) > 10:
                    comment_history = ["..."] + comment_history[-10:]
                st.session_state.comment_history = "\n\n".join(comment_history)
                st.session_state.comment_history += f"\n\n{vote_text} {current_comment_content}"

                # Check vote count and trigger special event
                # Initialize vote_count if it doesn't exist
                if 'vote_count' not in st.session_state:
                    st.session_state.vote_count = 0

                # Increment vote count only if it was a new vote or a change?
                # The current logic increments on every button click. Let's keep that for now
                # as it drives the special event trigger based on interaction frequency.
                st.session_state.vote_count += 1

                # Check if it's time for a potential special event (every 5 votes/interactions)
                if st.session_state.vote_count % 5 == 0:
                    st.session_state.vote_count = 0 # Reset count after triggering
                    # 30% chance to trigger the special sharing event
                    if random.random() < 0.3:
                        prompts = [
                            "An elder approaches you, seeking your perspective on the Emperor's concern. What wisdom do you share?",
                            "A letter arrives from the Emperor's office, requesting your personal insight on the matter. What counsel do you offer?",
                            "As you walk through the streets, people gather, eager to hear your thoughts on the Emperor's dilemma. What advice do you give?"
                        ]
                        # Pass the current topic_id to share_wisdom if needed, though it's not currently used there.
                        st.markdown(random.choice(prompts))
                        new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
                        st.session_state.handling_vote = True # lock
                        if st.button("Share Wisdom"):
                            if new_comment_text and len(new_comment_text.strip()):
                                user_email = st.session_state.get('user_email', '')
                                user_id = find_or_create_user(user_email) # Ensure user exists
                                if user_id:
                                    local_con = None
                                    try:
                                        local_con = duckdb.connect(database=DB_PATH, read_only=False)
                                        comment_id = str(uuid.uuid4())
                                        local_con.execute("""
                                            INSERT INTO comments (id, topic_id, user_id, content)
                                            VALUES (?, ?, ?, ?)
                                        """, [comment_id, topic_id, user_id, new_comment_text])

                                        # Append new comment to history
                                        st.session_state.comment_history += f"\n\n๐Ÿ’ฌ {new_comment_text}"

                                        st.session_state.tmp_new_comment_input = "" # Clear input box
                                    except Exception as e:
                                        st.error(f"Error sharing information: {e}")
                                    finally:
                                        if local_con:
                                            local_con.close()
                                else:
                                    st.error("Could not find or create user.")
                            st.session_state.handling_vote = False # lock

                # Get next comment
                # This should always get the next unvoted comment for the user in this topic.
                next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
                st.session_state.current_comment_id = next_comment_id
                st.session_state.current_comment_content = next_comment_content

                # Update progress
                # Update the user's progress to the next comment they should see.
                update_user_progress(user_id, topic_id, next_comment_id)

                st.session_state._voting_in_progress = False
                if st.session_state.get("handling_vote", False) is False:
                    st.rerun() # Rerun to update UI

            except Exception as e:
                st.error(f"Error processing vote: {e}")
            finally:
                if local_con:
                    local_con.close()

        st.session_state._voting_in_progress = st.session_state.get("_voting_in_progress", False)
        col1, col2, col3, col4 = st.columns(4)
        user_id = find_or_create_user(user_email) # Ensure user exists

        col1.markdown("*Personally I...*")
        if col2.button("Agree", disabled=st.session_state.get('_voting_in_progress', False)):
            # Set a flag immediately to disable buttons until next render
            if st.session_state._voting_in_progress == False:
                st.session_state._voting_in_progress = True
                handle_vote("agree", current_comment_id, topic_id, user_id)
        if col3.button("Neutral", disabled=st.session_state.get('_voting_in_progress', False)):
            # Set a flag immediately to disable buttons until next render
            if st.session_state._voting_in_progress == False:
                st.session_state._voting_in_progress = True
                handle_vote("neutral", current_comment_id, topic_id, user_id)
        if col4.button("Disagree", disabled=st.session_state.get('_voting_in_progress', False)):
            # Set a flag immediately to disable buttons until next render
            if st.session_state._voting_in_progress == False:
                st.session_state._voting_in_progress = True
                handle_vote("disagree", current_comment_id, topic_id, user_id)

    else:
        st.info("No more comments to vote on in this topic." if "No more comments" in current_comment_content else current_comment_content)


    st.markdown("")

    # --- Comment Submission ---
    with st.expander("Offer Your Counsel to the Emperor", expanded=False):
        st.markdown("Having heard the thoughts of others, what wisdom do you wish to share regarding the Emperor's concern?")
        new_comment_text = st.text_area(f"Your Insight", key="new_comment_input")
        if st.button("Share Your Wisdom"):
            if new_comment_text:
                user_email = st.session_state.get('user_email', '')
                user_id = find_or_create_user(user_email) # Ensure user exists
                if user_id:
                    local_con = None
                    try:
                        local_con = duckdb.connect(database=DB_PATH, read_only=False)
                        comment_id = str(uuid.uuid4())
                        local_con.execute("""
                            INSERT INTO comments (id, topic_id, user_id, content)
                            VALUES (?, ?, ?, ?)
                        """, [comment_id, topic_id, user_id, new_comment_text])

                        # Append new comment to history
                        st.session_state.comment_history += f"\n\n๐Ÿ’ฌ {new_comment_text}"

                        # Get next comment (could be the one just submitted)
                        next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
                        st.session_state.current_comment_id = next_comment_id
                        st.session_state.current_comment_content = next_comment_content

                        # Update progress
                        update_user_progress(user_id, topic_id, next_comment_id)

                        st.session_state.new_comment_input = "" # Clear input box
                        st.rerun() # Rerun to update UI

                    except Exception as e:
                        st.error(f"Error sharing information: {e}")
                    finally:
                        if local_con:
                            local_con.close()
                else:
                    st.error("Could not find or create user.")
            else:
                st.warning("Please enter your thought.")

    st.markdown("---")


    if st.button("Pack all insights and Return to Capital"):
        st.session_state.page = 'home'
        st.rerun()

    # st.components.v1.html("""
    #     <script>
    #     document.addEventListener('DOMContentLoaded', function() {
    #         // ็›‘ๅฌ DOM ๅ˜ๅŒ–
    #         const observer = new MutationObserver(() => scrollToTarget());
    #         observer.observe(document.body, {
    #             childList: true,
    #             subtree: true,
    #             attributes: true
    #         });
    #     });


    #     // ๆปšๅŠจๅˆฐ็›ฎๆ ‡ๅ…ƒ็ด 
    #     function scrollToTarget() {
    #         const target = document.querySelector("div.stColumn");
    #         if (target) {
    #             target.scrollIntoView({ behavior: "smooth", block: "center" });
    #         }
    #     }
    #     </script>""")

# Initialize session state for navigation and data
if 'page' not in st.session_state:
    st.session_state.page = 'home'
if 'current_topic_id' not in st.session_state:
    st.session_state.current_topic_id = None
if 'user_email' not in st.session_state:
    st.session_state.user_email = '' # Mimics browser state
if 'current_comment_id' not in st.session_state:
    st.session_state.current_comment_id = None
if 'current_comment_content' not in st.session_state:
    st.session_state.current_comment_content = "Loading comments..."
if 'comment_history' not in st.session_state:
    st.session_state.comment_history = ""
if 'processed_url_params' not in st.session_state:
    st.session_state.processed_url_params = False # Add flag initialization

# Initialize the database and add dummy data only once per session
if st.session_state.get("db_initialized", False) is False:
    print("INFO: Initializing database and adding dummy data...") # Optional: Info message
    initialize_database()
    add_dummy_topic()
    st.session_state.db_initialized = True
    print("INFO: Database initialization complete.") # Optional: Info message


# Handle initial load from URL query parameters
# Process only once per session load using the flag
query_params = st.query_params
# Check for 'topic' param and if it hasn't been processed yet
if 'topic' in query_params and not st.session_state.processed_url_params:
    topic_id_from_url = query_params.get('topic') # Use .get for safety
    if topic_id_from_url: # Check if topic_id is actually retrieved
        st.session_state.page = 'view_topic'
        st.session_state.current_topic_id = topic_id_from_url
        st.session_state.processed_url_params = True # Mark as processed
        # The view_topic_page will handle loading user/comment based on session_state.user_email
        st.rerun() # Rerun to apply the page change


# Render the appropriate page based on session state
if st.session_state.page == 'home':
    home_page()
elif st.session_state.page == 'create_topic':
    create_topic_page()
elif st.session_state.page == 'view_topic':
    view_topic_page()