Jan Mühlnikel
test ram
a272945
raw
history blame
1.74 kB
import pandas as pd
from functions.semantic_search import search
def contains_code(crs_codes, code_list):
codes = str(crs_codes).split(';')
return any(code in code_list for code in codes)
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
# Check if filters where not all should be selected are empty
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
# FILTER CRS
if crs3_list and not crs5_list:
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
elif crs3_list and crs5_list:
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
elif not crs3_list and crs5_list:
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
# FILTER SDG
if sdg_str != "":
df = df[df["sgd_pred_code"] == int(sdg_str)]
# FILTER COUNTRY
if country_code_list != []:
country_filtered_df = pd.DataFrame()
for c in country_code_list:
c_df = df[df["country"].str.contains(c, na=False)]
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
df = country_filtered_df
# FILTER ORGANIZATION
if orga_code_list != []:
df = df[df['orga_abbreviation'].isin(orga_code_list)]
# FILTER QUERY
if query != "" and len(df) > 0:
if len(df) < TOP_X_PROJECTS:
TOP_X_PROJECTS = len(df)
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
return df