File size: 38,967 Bytes
d551fc8
 
883557f
 
d551fc8
 
 
c8e0175
 
883557f
9821670
dbd3423
883557f
7d8805d
6a85a81
0ef6d21
55a6bd8
d551fc8
 
 
f17e764
 
6a0bd1a
 
 
f17e764
b188b37
7b52ef0
7d8805d
f17e764
6a0bd1a
 
 
f17e764
b188b37
2788caf
d551fc8
 
 
883557f
 
 
 
 
 
 
d551fc8
 
 
 
 
 
 
 
 
 
 
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21b6daa
 
 
 
d551fc8
883557f
d551fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3302f1
 
 
 
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3302f1
883557f
 
 
 
 
 
 
e3302f1
 
d551fc8
 
 
 
 
 
 
883557f
d551fc8
 
 
 
 
7b52ef0
d551fc8
883557f
 
 
21b6daa
d551fc8
 
 
 
 
883557f
d551fc8
cee7d6e
d551fc8
883557f
 
 
 
 
30a2bbf
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b88f1e8
9e1ae23
d551fc8
883557f
 
9e1ae23
 
 
d551fc8
 
883557f
 
 
 
 
 
55a6bd8
 
883557f
 
55a6bd8
 
883557f
55a6bd8
 
883557f
55a6bd8
 
 
9c2fca9
883557f
 
 
 
 
55a6bd8
 
883557f
 
 
 
 
 
 
 
52b7f30
883557f
 
 
 
 
 
 
 
 
e3302f1
 
883557f
 
 
 
 
 
 
e3302f1
 
 
883557f
e3302f1
 
883557f
 
 
 
 
 
d551fc8
 
 
55a6bd8
 
 
883557f
30488e6
55a6bd8
 
d551fc8
e3302f1
 
 
 
 
 
883557f
 
f123b98
883557f
343d08b
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2788caf
 
343d08b
b88f1e8
343d08b
 
fd82343
883557f
a272945
21b6daa
 
 
 
883557f
 
21b6daa
883557f
 
 
 
 
21b6daa
94fd083
8118f8d
883557f
 
 
 
 
 
 
 
8118f8d
c8bd695
883557f
5a0ec90
883557f
5a0ec90
883557f
5a0ec90
883557f
 
5a0ec90
 
 
 
883557f
5a0ec90
 
883557f
 
 
 
 
 
 
 
05fcf5b
 
 
 
 
 
883557f
 
 
 
 
 
 
 
e7fe5d8
5a0ec90
 
883557f
21b6daa
883557f
 
 
 
 
 
 
 
5a0ec90
867f2d7
883557f
 
 
 
 
 
 
 
 
 
 
 
 
5dbafb0
883557f
 
 
867f2d7
c8e0175
6f41b63
e22c7b1
 
 
883557f
e6dc9fc
883557f
867f2d7
883557f
867f2d7
e6dc9fc
883557f
867f2d7
e6dc9fc
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3089c42
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21b6daa
883557f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4bcd90c
 
 
 
 
883557f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
import streamlit as st
import pandas as pd
import io
import xlsxwriter
from scipy.sparse import load_npz
import pickle
from sentence_transformers import SentenceTransformer
from modules.multimatch_result_table import show_multi_table
from modules.singlematch_result_table import show_single_table
from modules.allprojects_result_table import show_all_projects_table
from functions.filter_multi_project_matching import filter_multi
from functions.filter_single_project_matching import filter_single
from functions.filter_all_project_matching import filter_all_projects
from functions.multi_project_matching import calc_multi_matches
from functions.same_country_filter import same_country_filter
from functions.single_project_matching import find_similar
import gc

# Catch DATA
# Load Similarity matrix
@st.cache_data
def load_sim_matrix():
    """
    !!! Similarities when matches between same orgas are allowed
    """
    loaded_matrix = load_npz("src/extended_similarities.npz")
    return loaded_matrix

# Load Non Similar Orga Matrix
def load_nonsameorga_sim_matrix():
    """
    !!! Similarities when matches between same orgas are NOT allowed
    """
    loaded_matrix = load_npz("src/extended_similarities_nonsimorga.npz")
    return loaded_matrix

# Load Projects DFs
@st.cache_data
def load_projects():
    def fix_faulty_descriptions(description): # In some BMZ projects there are duplicate descriptions 
        if description and ';' in description:
            parts = description.split(';')
            if len(parts) == 2 and parts[0].strip() == parts[1].strip():
                return parts[0].strip()
        return description

    orgas_df = pd.read_csv("src/projects/project_orgas.csv")
    region_df = pd.read_csv("src/projects/project_region.csv")
    sector_df = pd.read_csv("src/projects/project_sector.csv")
    status_df = pd.read_csv("src/projects/project_status.csv")
    texts_df = pd.read_csv("src/projects/project_texts.csv")

    projects_df = pd.merge(orgas_df, region_df, on='iati_id', how='inner')
    projects_df = pd.merge(projects_df, sector_df, on='iati_id', how='inner')
    projects_df = pd.merge(projects_df, status_df, on='iati_id', how='inner')
    projects_df = pd.merge(projects_df, texts_df, on='iati_id', how='inner')



    # Add regions (should have been done in the preprocessing instead of here, so is just a quick fix to be able to add the region filter)
    region_lookup_df = pd.read_csv('src/codelists/regions.csv', usecols=['alpha-2', 'region', 'sub-region'])

    projects_df['country_code'] = projects_df['country'].str.replace(';', '').str.strip()
    # Replace empty values in the 'country_code' column with 'Unknown'
    projects_df['country_code'] = projects_df['country_code'].fillna('Unknown')

    region_lookup_df['alpha-2'] = region_lookup_df['alpha-2'].str.strip()
    projects_df = pd.merge(projects_df, region_lookup_df[['alpha-2', 'region', 'sub-region']], left_on='country_code', right_on='alpha-2', how='left')
    
    projects_df.rename(columns={'region': 'continent', 'sub-region': 'region'}, inplace=True)
    projects_df['continent'] = projects_df['continent'].fillna('Unknown')
    projects_df['region'] = projects_df['region'].fillna('Unknown')


    # Fix faulty descriptions for BMZ projects
    bmz_mask = projects_df['orga_abbreviation'].str.lower() == 'bmz'
    projects_df.loc[bmz_mask, 'description_main'] = projects_df.loc[bmz_mask, 'description_main'].apply(fix_faulty_descriptions)

    # Add Project Link column
    projects_df['Project Link'] = projects_df['iati_id'].apply(
        lambda x: f'https://d-portal.org/ctrack.html#view=act&aid={x}'
    )

    # Create necessary columns for consistency
    projects_df['crs_3_code_list'] = projects_df['crs_3_name'].apply(
        lambda x: [""] if pd.isna(x) else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
    )
    projects_df['crs_5_code_list'] = projects_df['crs_5_name'].apply(
        lambda x: [""] if pd.isna(x) else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
    )
    projects_df['sdg_list'] = projects_df['sgd_pred_code'].apply(
        lambda x: [""] if pd.isna(x) else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
    )
    
    # Ensure country_flag is set to None if country_name is missing or "NA"
    projects_df['country_flag'] = projects_df.apply(
        lambda row: None if pd.isna(row['country_name']) or row['country_name'] == "NA" else row['country_flag'],
        axis=1
    )

    iati_search_list = [f'{row.iati_id}' for row in projects_df.itertuples()]
    title_search_list = [f'{row.title_main} ({row.orga_abbreviation.upper()})' for row in projects_df.itertuples()]

    return projects_df, iati_search_list, title_search_list


# Load CRS 3 data
@st.cache_data
def getCRS3():
    # Read in CRS3 CODELISTS
    crs3_df = pd.read_csv('src/codelists/crs3_codes.csv')
    CRS3_CODES = crs3_df['code'].tolist()
    CRS3_NAME = crs3_df['name'].tolist()
    CRS3_MERGED = {f"{name} - {code}": code for name, code in zip(CRS3_NAME, CRS3_CODES)}
    return CRS3_MERGED

# Load CRS 5 data
@st.cache_data
def getCRS5():
    # Read in CRS3 CODELISTS
    crs5_df = pd.read_csv('src/codelists/crs5_codes.csv')
    CRS5_CODES = crs5_df['code'].tolist()
    CRS5_NAME = crs5_df['name'].tolist()
    CRS5_MERGED = {code: [f"{name} - {code}"] for name, code in zip(CRS5_NAME, CRS5_CODES)}
    return CRS5_MERGED

# Load SDG data
@st.cache_data
def getSDG():
    # Read in SDG CODELISTS
    sdg_df = pd.read_csv('src/codelists/sdg_goals.csv')
    SDG_NAMES = sdg_df['name'].tolist()
    return SDG_NAMES

@st.cache_data
def getCountry():
    # Read in countries from codelist
    country_df = pd.read_csv('src/codelists/country_codes_ISO3166-1alpha-2.csv')
    
    # Read in regions from codelist, keeping only the relevant columns
    region_lookup_df = pd.read_csv('src/codelists/regions.csv', usecols=['alpha-2', 'region', 'sub-region'])
    
    # Strip quotes from the 'Alpha-2 code' column in country_df
    country_df['Alpha-2 code'] = country_df['Alpha-2 code'].str.replace('"', '').str.strip()
    
    # Ensure no leading/trailing spaces in the 'alpha-2' column in region_lookup_df
    region_lookup_df['alpha-2'] = region_lookup_df['alpha-2'].str.strip()
    
    # Merge country and region dataframes on 'Alpha-2 code' from country_df and 'alpha-2' from region_lookup_df
    merged_df = pd.merge(country_df, region_lookup_df, how='left', left_on='Alpha-2 code', right_on='alpha-2')
    
    # Handle any missing regions or sub-regions
    merged_df['region'] = merged_df['region'].fillna('Unknown')
    merged_df['sub-region'] = merged_df['sub-region'].fillna('Unknown')
    
    # Extract necessary columns as lists
    COUNTRY_CODES = merged_df['Alpha-2 code'].tolist()
    COUNTRY_NAMES = merged_df['Country'].tolist()
    REGIONS = merged_df['region'].tolist()
    SUB_REGIONS = merged_df['sub-region'].tolist()
    
    # Create the original COUNTRY_OPTION_LIST without regions
    COUNTRY_OPTION_LIST = [f"{COUNTRY_NAMES[i]} ({COUNTRY_CODES[i]})" for i in range(len(COUNTRY_NAMES))]

    # Create a hierarchical filter structure for sub-regions
    sub_region_hierarchy = {}
    sub_region_to_region = {}
    for i in range(len(SUB_REGIONS)):
        sub_region = SUB_REGIONS[i]
        country = COUNTRY_CODES[i]
        region = REGIONS[i]
        if sub_region not in sub_region_hierarchy:
            sub_region_hierarchy[sub_region] = []
        sub_region_hierarchy[sub_region].append(country)
        
        # Map sub-regions to regions
        sub_region_to_region[sub_region] = region

    # Sort the subregions by regions
    sorted_sub_regions = sorted(sub_region_hierarchy.keys(), key=lambda x: sub_region_to_region[x])
    
    return COUNTRY_OPTION_LIST, sorted_sub_regions

# Call the function to load and display the country data
COUNTRY_OPTION_LIST, REGION_OPTION_LIST = getCountry()


# Load Sentence Transformer Model
@st.cache_resource
def load_model():
    model = SentenceTransformer('all-MiniLM-L6-v2')
    return model

# Load Embeddings
@st.cache_data
def load_embeddings_and_index():
    # Load embeddings
    with open("src/embeddings.pkl", "rb") as fIn:
        stored_data = pickle.load(fIn)
    embeddings = stored_data["embeddings"]
    return embeddings

# USE CACHE FUNCTIONS
sim_matrix = load_sim_matrix() # For similarities when matches between same orgas are allowed
nonsameorgas_sim_matrix = load_nonsameorga_sim_matrix()  #For similarities when matches between same orgas are NOT allowed
projects_df, iati_search_list, title_search_list = load_projects()

CRS3_MERGED = getCRS3()
CRS5_MERGED = getCRS5()
SDG_NAMES = getSDG()

# LOAD MODEL FROM CACHE FOR SEMANTIC SEARCH
model = load_model()
embeddings = load_embeddings_and_index()



##################################

def show_landing_page():
    st.title("Development Project Synergy Finder")

    st.subheader("About")
    st.markdown("""
    Multiple international organizations have projects in the same field and region. These projects could collaborate or learn from each other to increase their impact if they were aware of one another. The Project Synergy Finder facilitates the search for similar projects across different development organizations and banks in three distinct ways.    """)
    st.markdown("<br><br>", unsafe_allow_html=True)  # Add two line breaks

    st.subheader("Pages")
    st.markdown("""
    1. **📊 All Projects**: Displays all projects included in the analysis.
        *Example Use Case*: Show all World Bank and African Development Bank projects in East Africa working towards the Sustainable Development Goal of achieving gender equality.
                

    2. **🎯 Single-Project Matching**: Finds the top similar projects to a selected one.
        *Example Use Case*: Show projects in Eastern Europe that are similar to the "Second Irrigation and Drainage Improvement Project" by the World Bank.
                

    3. **🔍 Multi-Project Matching**: Searches for matching pairs of projects.
        *Example Use Case*: Show pairs of similar projects in the "Energy Policy" sector from different organizations within the same country.
    """)
    st.markdown("<br><br>", unsafe_allow_html=True)  # Add two line breaks

    st.subheader("Data")
    st.markdown("""
    **IATI Data**: The data is sourced from the [IATI d-portal](https://d-portal.org/), providing detailed project-level information. IATI (International Aid Transparency Initiative) is a global initiative to improve the transparency of development and humanitarian resources and their results to address poverty and crises.
    
    **Data Update**: The data is updated irregularly, with the last retrieval on 10th May 2024.
    
    **Project Data**: Includes Project Title, Description, URL, Country, and Sector classification (CRS). The CRS5 and CRS3 classifications organize development aid into categories, with the 5-digit level providing more specific details within the broader 3-digit categories.
    
    **Organizations**: The tool currently includes projects from the following organizations:
    - **IAD**: Inter-American Development Bank
    - **ADB**: Asian Development Bank
    - **AfDB**: African Development Bank
    - **EIB**: European Investment Bank
    - **WB**: World Bank
    - **WBTF**: World Bank Trust Fund
    - **BMZ**: Federal Ministry for Economic Cooperation and Development (Germany)
    - **KfW**: KfW Development Bank (Germany)
    - **GIZ**: Deutsche Gesellschaft für Internationale Zusammenarbeit (Germany)
    - **AA**: German Federal Foreign Office (Germany)
    
    **Additional Data**: The Sustainable Development Goals (SDGs) are 17 UN goals aimed at achieving global sustainability, peace, and prosperity by 2030. The SDG categorization in this tool is AI-predicted based on project descriptions and titles using a [SDG Classifier](https://huggingface.co/jonas/bert-base-uncased-finetuned-sdg) trainded on the OSDG dataset.
    """)


##################################


def show_all_projects_page():
    # Define the page size at the beginning
    page_size = 30

    def reset_pagination():
        st.session_state.current_end_idx_all = page_size


    col1, col2, col3 = st.columns([10, 1, 10])
    with col1:
        st.subheader("Project Filter")

    st.session_state.crs5_option_disabled = True
    col1, col2, col3 = st.columns([10, 1, 10])
    with col1:
        # CRS 3 SELECTION
        crs3_option = st.multiselect(
            'CRS 3',
            CRS3_MERGED,
            placeholder="Select a CRS 3 code",
            on_change=reset_pagination,
            key='crs3_all_projects_page'
        )

        # CRS 5 SELECTION
        # Only enable crs5 select field when crs3 code is selected
        if crs3_option:
            st.session_state.crs5_option_disabled = False

        # Define list of crs5 codes depending on crs3 codes
        crs5_list = [txt[0].replace('"', "") for crs3_item in crs3_option for code, txt in CRS5_MERGED.items() if str(code)[:3] == str(crs3_item)[-3:]]

        # crs5 select field
        crs5_option = st.multiselect(
            'CRS 5',
            crs5_list,
            placeholder="Select a CRS 5 code",
            disabled=st.session_state.crs5_option_disabled,
            on_change=reset_pagination,
            key='crs5_all_projects_page'
        )

        # SDG SELECTION
        sdg_option = st.selectbox(
            label='Sustainable Development Goal',
            index=None,
            placeholder="Select a SDG",
            options=SDG_NAMES[:-1],
            on_change=reset_pagination,
            key='sdg_all_projects_page'
        )

    with col3:
        # REGION SELECTION
        region_option = st.multiselect(
            'Regions',
            REGION_OPTION_LIST,
            placeholder="All regions selected",
            on_change=reset_pagination,
            key='regions_all_projects_page'
        )

        # COUNTRY SELECTION
        country_option = st.multiselect(
            'Countries',
            COUNTRY_OPTION_LIST,
            placeholder="All countries selected",
            on_change=reset_pagination,
            key='country_all_projects_page'
        )

        # ORGA SELECTION
        orga_abbreviation = projects_df["orga_abbreviation"].unique()
        orga_full_names = projects_df["orga_full_name"].unique()
        orga_list = [f"{orga_full_names[i]} ({orga_abbreviation[i].upper()})" for i in range(len(orga_abbreviation))]

        orga_option = st.multiselect(
            'Organizations',
            orga_list,
            placeholder="All organizations selected",
            on_change=reset_pagination,
            key='orga_all_projects_page'
        )

    # CRS CODE LIST
    crs3_list = [i[-3:] for i in crs3_option]
    crs5_list = [i[-5:] for i in crs5_option]

    # SDG CODE LIST
    if sdg_option is not None:
        sdg_str = sdg_option.split(".")[0]
    else:
        sdg_str = ""

    # COUNTRY CODES LIST
    country_code_list = [option[-3:-1] for option in country_option]

    # ORGANIZATION CODES LIST
    orga_code_list = [option.split("(")[1][:-1].lower() for option in orga_option]

    st.write("-----")

    # FILTER DF WITH SELECTED FILTER OPTIONS
    filtered_df = filter_all_projects(projects_df, country_code_list, orga_code_list, crs3_list, crs5_list, sdg_str, region_option)
    if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
        # Implement pagination
        if 'current_end_idx_all' not in st.session_state:
            st.session_state.current_end_idx_all = page_size

        end_idx = st.session_state.current_end_idx_all

        paginated_df = filtered_df.iloc[:end_idx]

        col1, col2 = st.columns([7, 3])
        with col1:
            st.subheader("Filtered Projects")
        with col2:
            # Add a download button for the paginated results
            def to_excel(df, sheet_name):
                # Rename columns
                df = df.rename(columns={
                    "iati_id": "IATI Identifier",
                    "title_main": "Title",
                    "orga_abbreviation": "Organization",
                    "description_main": "Description",
                    "country_name": "Country",
                    "sdg_list": "SDG List",
                    "crs_3_code_list": "CRS 3 Codes",
                    "crs_5_code_list": "CRS 5 Codes",
                    "Project Link": "Project Link"
                })
                output = io.BytesIO()
                writer = pd.ExcelWriter(output, engine='xlsxwriter')
                df.to_excel(writer, index=False, sheet_name=sheet_name)
                writer.close()
                processed_data = output.getvalue()
                return processed_data

            # Direct download buttons
            columns_to_include = ["iati_id", "title_main", "orga_abbreviation", "description_main", "country_name", "sdg_list", "crs_3_code_list", "crs_5_code_list", "Project Link"]

            with st.expander("Excel Download"):
                # First 15 Results Button
                df_to_download_15 = filtered_df[columns_to_include].head(15)
                excel_data_15 = to_excel(df_to_download_15, "Sheet1")
                st.download_button(label="First 30 Projects", data=excel_data_15, file_name="First_15_All_Projects_Filtered.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

                # All Results Button
                df_to_download_all = filtered_df[columns_to_include]
                excel_data_all = to_excel(df_to_download_all, "Sheet1")
                st.download_button(label="All", data=excel_data_all, file_name="All_All_Projects_Filtered.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
                    
        show_all_projects_table(projects_df, paginated_df)

        st.write(f"Showing 1 to {min(end_idx, len(filtered_df))} of {len(filtered_df)} projects")

        # Center the buttons and place them close together
        col1, col2, col3, col4, col5 = st.columns([2, 1, 1, 1, 2])
        with col2:
            if st.button('Show More', key='show_more'):
                st.session_state.current_end_idx_all = min(end_idx + page_size, len(filtered_df))
                st.experimental_rerun()
        with col4:
            if st.button('Show Less', key='show_less') and end_idx > page_size:
                st.session_state.current_end_idx_all = max(end_idx - page_size, page_size)
                st.experimental_rerun()

    else:
        st.write("-----")
        col1, col2, col3 = st.columns([1, 1, 1])
        with col2:
            st.write("  ")
            st.markdown("<span style='color: red'>There are no results for the applied filter. Try another filter!</span>", unsafe_allow_html=True)

    del crs3_list, crs5_list, sdg_str, filtered_df
    gc.collect()



##################################

def show_single_matching_page():
    # Define the page size at the beginning
    page_size = 15

    def reset_pagination():
        st.session_state.current_end_idx_single = page_size

    with st.expander("Explanation"):
        st.caption("""
                    Single Project Matching enables you to choose an individual project using either the project IATI ID or title, to display projects most similar to it.
                
                    **Similarity Score**:
                    - Similarity ranges from 0 to 100 (identical projects score 100%), and is calculated based on           
                        - Text similarity of project description and title (MiniLMM & Cosine Similiarity).
                        - Matching of SDGs (AI-predicted).
                        - Matching of CRS-3 & CRS-5 sector codes.
                    - Components are weighted to give a normalized score. 
                    """)

    col1, col2, col3 = st.columns([10, 1, 10])
    with col1:
        st.subheader("Reference Project")
        st.caption("""
                    Select a reference project either by its title or IATI ID. 
                    """)
    with col3:
        st.subheader("Filters for Similar Projects")
        st.caption("""
                    The filters are applied to find the similar projects and are independend of the selected reference project.
                """)

    col1, col2, col3 = st.columns([10, 1, 10])
    with col1:
        search_option = st.selectbox(
            label='Search with project title or IATI ID',
            index=0,
            placeholder=" ",
            options=["Search with IATI ID", "Search with project title"],
            on_change=reset_pagination,
            key='search_option_single'
        )

        if search_option == "Search with IATI ID":
            search_list = iati_search_list
        else:
            search_list = title_search_list

        project_option = st.selectbox(
            label='Search for a project',
            index=None,
            placeholder=" ",
            options=search_list,
            on_change=reset_pagination,
            key='project_option_single'
        )

    with col3:
        orga_abbreviation = projects_df["orga_abbreviation"].unique()
        orga_full_names = projects_df["orga_full_name"].unique()
        orga_list = [f"{orga_full_names[i]} ({orga_abbreviation[i].upper()})" for i in range(len(orga_abbreviation))]

        # REGION SELECTION
        region_option_s = st.multiselect(
            'Regions',
            REGION_OPTION_LIST,
            placeholder="All regions selected",
            on_change=reset_pagination,
            key='regions_single_projects_page'
        )

        country_option_s = st.multiselect(
            'Countries ',
            COUNTRY_OPTION_LIST,
            placeholder="All countries selected ",
            on_change=reset_pagination,
            key='country_option_single'
        )
        orga_option_s = st.multiselect(
            'Organizations',
            orga_list,
            placeholder="All organizations selected ",
            on_change=reset_pagination,
            key='orga_option_single'
        )

        different_orga_checkbox_s = st.checkbox("Only matches between different organizations ", value=True, on_change=reset_pagination, key='different_orga_checkbox_single')

    st.write("-----")

    if project_option:
        selected_project_index = search_list.index(project_option)
        country_code_list = [option[-3:-1] for option in country_option_s]
        orga_code_list = [option.split("(")[1][:-1].lower() for option in orga_option_s]

        TOP_X_PROJECTS = 1000
        with st.spinner('Please wait...'):
            filtered_df_s = filter_single(projects_df, country_code_list, orga_code_list, region_option_s)

        if isinstance(filtered_df_s, pd.DataFrame) and len(filtered_df_s) != 0:
            if different_orga_checkbox_s:
                with st.spinner('Please wait...'):
                    top_projects_df = find_similar(selected_project_index, nonsameorgas_sim_matrix, filtered_df_s, TOP_X_PROJECTS)
            else:
                with st.spinner('Please wait...'):
                    top_projects_df = find_similar(selected_project_index, sim_matrix, filtered_df_s, TOP_X_PROJECTS)

            # Implement show more, show less, and show all functionality
            if 'current_end_idx_single' not in st.session_state:
                st.session_state.current_end_idx_single = page_size

            end_idx = st.session_state.current_end_idx_single

            paginated_df = top_projects_df.iloc[:end_idx]

            # Add a download button for the paginated results
            def to_excel(df, sheet_name):
                # Rename columns
                df = df.rename(columns={
                    "similarity": "Similarity Score",
                    "iati_id": "IATI Identifier",
                    "title_main": "Title",
                    "orga_abbreviation": "Organization",
                    "description_main": "Description",
                    "country_name": "Country",
                    "sdg_list": "SDG List",
                    "crs_3_code_list": "CRS 3 Codes",
                    "crs_5_code_list": "CRS 5 Codes",
                    "Project Link": "Project Link"
                })
                output = io.BytesIO()
                writer = pd.ExcelWriter(output, engine='xlsxwriter')
                df.to_excel(writer, index=False, sheet_name=sheet_name)
                writer.close()
                processed_data = output.getvalue()
                return processed_data

            # Direct download buttons
            columns_to_include = ["similarity", "iati_id", "title_main", "orga_abbreviation", "description_main", "country_name", "sdg_list", "crs_3_code_list", "crs_5_code_list", "Project Link"]

            col1, col2 = st.columns([15, 5])
            with col2:
                with st.expander("Excel Download"):
                    # First 15 Results Button
                    df_to_download_15 = top_projects_df[columns_to_include].head(15)
                    excel_data_15 = to_excel(df_to_download_15, "Sheet1")
                    st.download_button(label="Download first 15 projects", data=excel_data_15, file_name="First_15_Single_Project_Matching_Results.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
                    df_to_download_all = top_projects_df[columns_to_include]
                    excel_data_all = to_excel(df_to_download_all, "Sheet1")
                    st.download_button(label="Download All", data=excel_data_all, file_name="All_Single_Project_Matching_Results.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
                
            show_single_table(selected_project_index, projects_df, paginated_df)

            st.write(f"Showing 1 to {min(end_idx, len(top_projects_df))} of {len(top_projects_df)} projects")

            # Center the buttons and place them close together
            col1, col2, col3, col4, col5 = st.columns([2, 1, 1, 1, 2])
            with col2:
                if st.button('Show More'):
                    st.session_state.current_end_idx_single = min(end_idx + page_size, len(top_projects_df))
                    st.experimental_rerun()
            with col3:
                if st.button('Show Less') and end_idx > page_size:
                    st.session_state.current_end_idx_single = max(end_idx - page_size, page_size)
                    st.experimental_rerun()
            with col4:
                if st.button('Show All'):
                    st.session_state.current_end_idx_single = len(top_projects_df)
                    st.experimental_rerun()

        else:
            st.write("-----")
            col1, col2, col3 = st.columns([1, 1, 1])
            with col2:
                st.write("  ")
                st.markdown("<span style='color: red'>There are no results for this filter!</span>", unsafe_allow_html=True)
    gc.collect()


##################################
def show_multi_matching_page():
    # Define the page size at the beginning
    page_size = 30

    def reset_pagination():
        st.session_state.current_end_idx_multi = page_size

    with st.expander("Explanation"):
        st.caption("""
        Multi-Project Matching enables to find collaboration opportunities by identifying matching (=similar) projects.

        **How It Works**:
        - Filter projects by CRS sector, SDG, country, and organization.
        - Each match displays two similar projects side-by-side.

        **Similarity Score**:
        - Similarity ranges from 0 to 100 (Identical projects score 100%), and is calculated based on           
        - Text similarity of project description and title (MiniLMM & Cosine Similiarity).
        - Matching of SDGs (AI-predicted).
        - Matching of CRS-3 & CRS-5 sector codes.
        - Components are weighted to give a normalized score. 
        """)
    col1, col2, col3 = st.columns([10, 1, 10])
    with col1:
        st.subheader("Sector Filters")
        st.caption("""
            At least one sector filter must be applied to see results.
        """)
    with col3:
        st.subheader("Additional Filters")

    st.session_state.crs5_option_disabled = True
    col1, col2, col3 = st.columns([10, 1, 10])
    with col1:
        crs3_option = st.multiselect(
            'CRS 3',
            CRS3_MERGED,
            placeholder="Select a CRS 3 code",
            on_change=reset_pagination,
            key='crs3_multi_projects_page'
        )

        if crs3_option:
            st.session_state.crs5_option_disabled = False

        crs5_list = [txt[0].replace('"', "") for crs3_item in crs3_option for code, txt in CRS5_MERGED.items() if str(code)[:3] == str(crs3_item)[-3:]]

        crs5_option = st.multiselect(
            'CRS 5',
            crs5_list,
            placeholder="Select a CRS 5 code",
            disabled=st.session_state.crs5_option_disabled,
            on_change=reset_pagination,
            key='crs5_multi_projects_page'
        )

        sdg_option = st.selectbox(
            label='Sustainable Development Goal',
            index=None,
            placeholder="Select a SDG",
            options=SDG_NAMES[:-1],
            on_change=reset_pagination,
            key='sdg_multi_projects_page'
        )

        query = ""

    with col3:
        region_option = st.multiselect(
            'Regions',
            REGION_OPTION_LIST,
            placeholder="All regions selected",
            on_change=reset_pagination,
            key='regions_multi_projects_page'
        )
        country_option = st.multiselect(
            'Countries',
            COUNTRY_OPTION_LIST,
            placeholder="All countries selected",
            on_change=reset_pagination,
            key='country_multi_projects_page'
        )

        orga_abbreviation = projects_df["orga_abbreviation"].unique()
        orga_full_names = projects_df["orga_full_name"].unique()
        orga_list = [f"{orga_full_names[i]} ({orga_abbreviation[i].upper()})" for i in range(len(orga_abbreviation))]

        orga_option = st.multiselect(
            'Organizations',
            orga_list,
            placeholder="All organizations selected",
            on_change=reset_pagination,
            key='orga_multi_projects_page'
        )

        identical_country_checkbox = st.checkbox("Only matches where country is identical", value=True, on_change=reset_pagination, key='identical_country_checkbox_multi')
        different_orga_checkbox = st.checkbox("Only matches between different organizations", value=True, on_change=reset_pagination, key='different_orga_checkbox_multi')
        filtered_country_only_checkbox = st.checkbox("Only matches between filtered countries", value=True, on_change=reset_pagination, key='filtered_country_only_checkbox_multi')
        filtered_orga_only_checkbox = st.checkbox("Only matches between filtered organisations", value=True, on_change=reset_pagination, key='filtered_orga_only_checkbox_multi')


    # CRS CODE LIST
    crs3_list = [i[-3:] for i in crs3_option]
    crs5_list = [i[-5:] for i in crs5_option]

    # SDG CODE LIST
    sdg_str = sdg_option.split(".")[0] if sdg_option else ""

    # COUNTRY CODES LIST
    country_code_list = [option[-3:-1] for option in country_option]

    # ORGANIZATION CODES LIST
    orga_code_list = [option.split("(")[1][:-1].lower() for option in orga_option]

    # Handle case where no organizations are selected but the checkbox is checked
    if filtered_orga_only_checkbox and not orga_code_list:
        orga_code_list = projects_df["orga_abbreviation"].unique().tolist()

    # FILTER DF WITH SELECTED FILTER OPTIONS
    TOP_X_PROJECTS = 2000
    filtered_df = filter_multi(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, region_option, query, model, embeddings, TOP_X_PROJECTS)
    if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
        # FIND MATCHES
        # If only same country checkbox is activated
        if filtered_country_only_checkbox:
            with st.spinner('Please wait...'):
                compare_df = same_country_filter(projects_df, country_code_list)
        else:
            compare_df = projects_df

        if filtered_orga_only_checkbox:
            compare_df = compare_df[compare_df['orga_abbreviation'].isin(orga_code_list)]

        # if show only different orgas checkbox is activated
        with st.spinner('Please wait...'):
            p1_df, p2_df = calc_multi_matches(filtered_df, compare_df, nonsameorgas_sim_matrix if different_orga_checkbox else sim_matrix, TOP_X_PROJECTS, identical_country=identical_country_checkbox)

        # Sort by similarity before pagination
        p1_df = p1_df.sort_values(by='similarity', ascending=False)
        p2_df = p2_df.sort_values(by='similarity', ascending=False)

        # Implement pagination
        if 'current_end_idx_multi' not in st.session_state:
            st.session_state.current_end_idx_multi = page_size

        end_idx = st.session_state.current_end_idx_multi

        paginated_p1_df = p1_df.iloc[:end_idx]
        paginated_p2_df = p2_df.iloc[:end_idx]

        if not paginated_p1_df.empty and not paginated_p2_df.empty:
            col1, col2 = st.columns([10, 2])
            with col1:
                st.subheader("Matched Projects")
            with col2:
                # Add a download button for the paginated results
                def to_excel(p1_df, p2_df, sheet_name):
                    # Rename columns
                    p1_df = p1_df.rename(columns={
                        "similarity": "Similarity Score",
                        "iati_id": "IATI Identifier",
                        "title_main": "Title",
                        "orga_abbreviation": "Organization",
                        "description_main": "Description",
                        "country_name": "Country",
                        "sdg_list": "SDG List",
                        "crs_3_code_list": "CRS 3 Codes",
                        "crs_5_code_list": "CRS 5 Codes",
                        "Project Link": "Project Link"
                    })
                    p2_df = p2_df.rename(columns={
                        "similarity": "Similarity Score",
                        "iati_id": "IATI Identifier",
                        "title_main": "Title",
                        "orga_abbreviation": "Organization",
                        "description_main": "Description",
                        "country_name": "Country",
                        "sdg_list": "SDG List",
                        "crs_3_code_list": "CRS 3 Codes",
                        "crs_5_code_list": "CRS 5 Codes",
                        "Project Link": "Project Link"
                    })
                    
                    combined_df = pd.concat([p1_df, pd.DataFrame([{}]), p2_df], ignore_index=True)
                    combined_df.fillna('', inplace=True)
                    
                    empty_row = pd.DataFrame([{}])
                    combined_df_list = []
                    
                    for idx in range(0, len(p1_df), 2):
                        combined_df_list.append(p1_df.iloc[[idx]])
                        combined_df_list.append(p2_df.iloc[[idx]])
                        combined_df_list.append(empty_row)
                    
                    combined_df = pd.concat(combined_df_list, ignore_index=True)
                    
                    output = io.BytesIO()
                    writer = pd.ExcelWriter(output, engine='xlsxwriter')
                    combined_df.to_excel(writer, index=False, sheet_name=sheet_name)
                    writer.close()
                    processed_data = output.getvalue()
                    return processed_data

                # Direct download buttons
                columns_to_include = ["similarity", "iati_id", "title_main", "orga_abbreviation", "description_main", "country_name", "sdg_list", "crs_3_code_list", "crs_5_code_list", "Project Link"]

                with st.expander("Excel Download"):
                    # First 15 Results Button
                    p1_df_to_download_15 = p1_df[columns_to_include].head(30)
                    p2_df_to_download_15 = p2_df[columns_to_include].head(30)
                    excel_data_15 = to_excel(p1_df_to_download_15, p2_df_to_download_15, "Sheet1")
                    st.download_button(label="First 15 Matches", data=excel_data_15, file_name="First_15_Multi_Projects_Matching_Results.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

                    # All Results Button
                    p1_df_to_download_all = p1_df[columns_to_include]
                    p2_df_to_download_all = p2_df[columns_to_include]
                    excel_data_all = to_excel(p1_df_to_download_all, p2_df_to_download_all, "Sheet1")
                    st.download_button(label="All", data=excel_data_all, file_name="All_Multi_Projects_Matching_Results.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

            show_multi_table(paginated_p1_df, paginated_p2_df)

            st.write(f"Showing 1 to {min(end_idx // 2, len(p1_df) // 2)} of {len(p1_df) // 2} matches")

            # Center the buttons and place them close together
            col1, col2, col3, col4, col5 = st.columns([2, 1, 1, 1, 2])
            with col2:
                if st.button('Show More', key='show_more_button'):
                    st.session_state.current_end_idx_multi = min(end_idx + page_size, len(p1_df))
                    st.experimental_rerun()
            with col3:
                if st.button('Show Less', key='show_less_button') and end_idx > page_size:
                    st.session_state.current_end_idx_multi = max(end_idx - page_size, page_size)
                    st.experimental_rerun()
            with col4:
                if st.button('Show All', key='show_all_button'):
                    st.session_state.current_end_idx_multi = len(p1_df)
                    st.experimental_rerun()

            del p1_df, p2_df
        else:
            st.write("-----")
            col1, col2, col3 = st.columns([1, 1, 1])
            with col2:
                st.write("  ")
                st.markdown("<span style='color: red'>There are no results for the applied filter. Try another filter!</span>", unsafe_allow_html=True)

    else:
        st.write("-----")
        col1, col2, col3 = st.columns([1, 1, 1])
        with col2:
            st.write("  ")
            st.markdown("<span style='color: red'>There are no results for the applied filter. Try another filter!</span>", unsafe_allow_html=True)

    del crs3_list, crs5_list, sdg_str, filtered_df
    gc.collect()