import pandas as pd from functions.semantic_search import search def contains_code(crs_codes, code_list): codes = str(crs_codes).split(';') return any(code in code_list for code in codes) def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30): # Check if filters where not all should be selected are empty if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "": # FILTER CRS if crs3_list and not crs5_list: df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))] elif crs3_list and crs5_list: df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] elif not crs3_list and crs5_list: df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] # FILTER SDG if sdg_str != "": df = df[df["sgd_pred_code"] == int(sdg_str)] # FILTER COUNTRY if country_code_list != []: country_filtered_df = pd.DataFrame() for c in country_code_list: c_df = df[df["country"].str.contains(c, na=False)] country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False) df = country_filtered_df # FILTER ORGANIZATION if orga_code_list != []: df = df[df['orga_abbreviation'].isin(orga_code_list)] # FILTER QUERY if query != "" and len(df) > 0: if len(df) < TOP_X_PROJECTS: TOP_X_PROJECTS = len(df) df = search(query, model, embeddings, df, TOP_X_PROJECTS) return df