File size: 1,736 Bytes
57a5237
71de22d
57a5237
 
 
 
 
71de22d
e3302f1
71de22d
e3302f1
 
55a6bd8
 
 
 
 
 
 
e3302f1
55a6bd8
 
57a5237
e3302f1
 
 
 
 
7823114
e3302f1
 
 
 
 
 
 
71de22d
 
 
 
 
 
 
e3302f1
55a6bd8
a272945
55a6bd8
57a5237
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
from functions.semantic_search import search

def contains_code(crs_codes, code_list):
    codes = str(crs_codes).split(';')
    return any(code in code_list for code in codes)

def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
    # Check if filters where not all should be selected are empty
    if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":

        # FILTER CRS
        if crs3_list and not crs5_list:  
            df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
        elif crs3_list and crs5_list:  
            df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
        elif not crs3_list and crs5_list:
            df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]

        # FILTER SDG
        if sdg_str != "": 
            df = df[df["sgd_pred_code"] == int(sdg_str)]

        # FILTER COUNTRY
        if country_code_list != []:
            country_filtered_df = pd.DataFrame()
            for c in country_code_list:
                c_df = df[df["country"].str.contains(c, na=False)]
                country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
            
            df = country_filtered_df
        
        # FILTER ORGANIZATION
        if orga_code_list != []:
            df = df[df['orga_abbreviation'].isin(orga_code_list)]

        # FILTER QUERY
        if query != "" and len(df) > 0:
            if len(df) < TOP_X_PROJECTS:
                TOP_X_PROJECTS = len(df)
            df = search(query, model, embeddings, df, TOP_X_PROJECTS)



        return df