Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import pandas as pd | |
from functions.semantic_search import search | |
def contains_code(crs_codes, code_list): | |
codes = str(crs_codes).split(';') | |
return any(code in code_list for code in codes) | |
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30): | |
# Check if filters where not all should be selected are empty | |
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "": | |
# FILTER CRS | |
if crs3_list and not crs5_list: | |
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))] | |
elif crs3_list and crs5_list: | |
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] | |
elif not crs3_list and crs5_list: | |
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] | |
# FILTER SDG | |
if sdg_str != "": | |
df = df[df["sgd_pred_code"] == int(sdg_str)] | |
# FILTER COUNTRY | |
if country_code_list != []: | |
country_filtered_df = pd.DataFrame() | |
for c in country_code_list: | |
c_df = df[df["country"].str.contains(c, na=False)] | |
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False) | |
df = country_filtered_df | |
# FILTER ORGANIZATION | |
if orga_code_list != []: | |
df = df[df['orga_abbreviation'].isin(orga_code_list)] | |
# FILTER QUERY | |
if query != "" and len(df) > 0: | |
if len(df) < TOP_X_PROJECTS: | |
TOP_X_PROJECTS = len(df) | |
df = search(query, model, embeddings, df, TOP_X_PROJECTS) | |
return df | |