Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Merge branch 'main' of https://huggingface.co/spaces/GIZ/eb-synergy-app
Browse files- functions/filter_projects.py +49 -0
- modules/navbar.py +40 -0
- requirements.txt +9 -0
- similarity_page.py +3 -1
functions/filter_projects.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from functions.semantic_search import search
|
3 |
+
|
4 |
+
def contains_code(crs_codes, code_list):
|
5 |
+
codes = str(crs_codes).split(';')
|
6 |
+
return any(code in code_list for code in codes)
|
7 |
+
|
8 |
+
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
|
9 |
+
# Check if filters where not all should be selected are empty
|
10 |
+
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
|
11 |
+
|
12 |
+
# FILTER CRS
|
13 |
+
if crs3_list and not crs5_list:
|
14 |
+
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
|
15 |
+
elif crs3_list and crs5_list:
|
16 |
+
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
|
17 |
+
elif not crs3_list and crs5_list:
|
18 |
+
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
|
19 |
+
|
20 |
+
# FILTER SDG
|
21 |
+
if sdg_str != "":
|
22 |
+
df = df[df["sgd_pred_code"] == int(sdg_str)]
|
23 |
+
|
24 |
+
# FILTER COUNTRY
|
25 |
+
if country_code_list != []:
|
26 |
+
country_filtered_df = pd.DataFrame()
|
27 |
+
for c in country_code_list:
|
28 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
29 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
30 |
+
|
31 |
+
df = country_filtered_df
|
32 |
+
|
33 |
+
# FILTER ORGANIZATION
|
34 |
+
if orga_code_list != []:
|
35 |
+
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
36 |
+
|
37 |
+
# FILTER QUERY
|
38 |
+
if query != "" and len(df) > 0:
|
39 |
+
if len(df) < TOP_X_PROJECTS:
|
40 |
+
TOP_X_PROJECTS = len(df)
|
41 |
+
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
return df
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
modules/navbar.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_option_menu import option_menu # https://github.com/victoryhb/streamlit-option-menu
|
3 |
+
import similarity_page
|
4 |
+
|
5 |
+
# giz-dsc colors
|
6 |
+
# orange: #e5b50d
|
7 |
+
# green: #48d47b
|
8 |
+
# blue: #0da2dc
|
9 |
+
# grey: #dadada
|
10 |
+
|
11 |
+
# giz colors https://www.giz.de/cdc/en/html/59638.html
|
12 |
+
# red: #c80f0f
|
13 |
+
# grey: #6f6f6f
|
14 |
+
# light_grey: #b2b2b2
|
15 |
+
# light_red: #eba1a3
|
16 |
+
|
17 |
+
def show_navbar():
|
18 |
+
#st.markdown("<h1 style='color: red;'>THIS APP IS WORK IN PROGRESS ...</h1>", unsafe_allow_html=True)
|
19 |
+
|
20 |
+
#st.title("Development Bank Synergy Mapper")
|
21 |
+
|
22 |
+
# enlarge tab fontsizes
|
23 |
+
css = '''
|
24 |
+
<style>
|
25 |
+
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
|
26 |
+
font-size:1rem;
|
27 |
+
}
|
28 |
+
</style>
|
29 |
+
'''
|
30 |
+
st.markdown(css, unsafe_allow_html=True)
|
31 |
+
tab1, tab2 = st.tabs([
|
32 |
+
"🔍 Multi-Project Matching",
|
33 |
+
"🎯 Single-Project Matching"
|
34 |
+
])
|
35 |
+
|
36 |
+
with tab1:
|
37 |
+
similarity_page.show_multi_matching_page()
|
38 |
+
|
39 |
+
with tab2:
|
40 |
+
similarity_page.show_single_matching_page()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.26.4
|
2 |
+
pandas==2.1.4
|
3 |
+
streamlit==1.32.2
|
4 |
+
streamlit-option-menu==0.3.12
|
5 |
+
scipy==1.12.0
|
6 |
+
faiss-cpu==1.8.0
|
7 |
+
faiss-gpu==1.7.2
|
8 |
+
sentence-transformers==2.5.1
|
9 |
+
streamlit-aggrid==0.3.4.
|
similarity_page.py
CHANGED
@@ -17,13 +17,15 @@ from functions.filter_single import filter_single
|
|
17 |
from functions.calc_matches import calc_matches
|
18 |
from functions.same_country_filter import same_country_filter
|
19 |
from functions.single_similar import find_similar
|
20 |
-
import psutil
|
21 |
import os
|
22 |
import gc
|
23 |
|
|
|
24 |
def get_process_memory():
|
25 |
process = psutil.Process(os.getpid())
|
26 |
return process.memory_info().rss / (1024 * 1024)
|
|
|
27 |
|
28 |
# Catch DATA
|
29 |
# Load Similarity matrix
|
|
|
17 |
from functions.calc_matches import calc_matches
|
18 |
from functions.same_country_filter import same_country_filter
|
19 |
from functions.single_similar import find_similar
|
20 |
+
#import psutil
|
21 |
import os
|
22 |
import gc
|
23 |
|
24 |
+
"""
|
25 |
def get_process_memory():
|
26 |
process = psutil.Process(os.getpid())
|
27 |
return process.memory_info().rss / (1024 * 1024)
|
28 |
+
"""
|
29 |
|
30 |
# Catch DATA
|
31 |
# Load Similarity matrix
|