Jan Mühlnikel commited on
Commit
1de85ab
1 Parent(s): 1693bac
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +0 -36
  2. .streamlit/config.toml +0 -14
  3. README.md +0 -13
  4. __pycache__/app.cpython-310.pyc +0 -0
  5. __pycache__/crs.cpython-310.pyc +0 -0
  6. __pycache__/home.cpython-310.pyc +0 -0
  7. __pycache__/sector.cpython-310.pyc +0 -0
  8. __pycache__/similarity.cpython-310.pyc +0 -0
  9. __pycache__/similarity_page.cpython-310.pyc +0 -0
  10. app.py +0 -1
  11. functions/__pycache__/calc_matches.cpython-310.pyc +0 -0
  12. functions/__pycache__/filter_projects.cpython-310.pyc +0 -0
  13. functions/__pycache__/semantic_search.cpython-310.pyc +0 -0
  14. functions/__pycache__/single_similar.cpython-310.pyc +0 -0
  15. functions/calc_matches.py +0 -37
  16. functions/different_orga_filter.py +0 -12
  17. functions/filter_projects.py +0 -54
  18. functions/filter_single.py +0 -22
  19. functions/same_country_filter.py +0 -16
  20. functions/semantic_search.py +0 -27
  21. functions/single_similar.py +0 -25
  22. modules/__pycache__/crs_table.cpython-310.pyc +0 -0
  23. modules/__pycache__/filter_modules.cpython-310.pyc +0 -0
  24. modules/__pycache__/filter_projects.cpython-310.pyc +0 -0
  25. modules/__pycache__/navbar.cpython-310.pyc +0 -0
  26. modules/__pycache__/result_table.cpython-310.pyc +0 -0
  27. modules/__pycache__/sdg_table.cpython-310.pyc +0 -0
  28. modules/__pycache__/semantic_search.cpython-310.pyc +0 -0
  29. modules/__pycache__/similarity_table.cpython-310.pyc +0 -0
  30. modules/multimatch_result_table.py +0 -134
  31. modules/navbar.py +0 -39
  32. requirements.txt +0 -10
  33. similarity_page.py +4 -10
  34. src/codelists/country_codes_ISO3166-1alpha-2.csv +0 -3
  35. src/codelists/crs3_codes.csv +0 -3
  36. src/codelists/crs5_codes.csv +0 -3
  37. src/codelists/flags/AC.png +0 -0
  38. src/codelists/flags/AD.png +0 -0
  39. src/codelists/flags/AE.png +0 -0
  40. src/codelists/flags/AF.png +0 -0
  41. src/codelists/flags/AG.png +0 -0
  42. src/codelists/flags/AI-alt.png +0 -0
  43. src/codelists/flags/AI.png +0 -0
  44. src/codelists/flags/AL.png +0 -0
  45. src/codelists/flags/AM.png +0 -0
  46. src/codelists/flags/AO.png +0 -0
  47. src/codelists/flags/AQ.png +0 -0
  48. src/codelists/flags/AR.png +0 -0
  49. src/codelists/flags/AS.png +0 -0
  50. src/codelists/flags/AT.png +0 -0
.gitattributes DELETED
@@ -1,36 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- *.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.streamlit/config.toml DELETED
@@ -1,14 +0,0 @@
1
- [global]
2
-
3
- [server]
4
- headless = true
5
-
6
- [client]
7
- initialSidebarState = "expanded"
8
-
9
- [theme]
10
- primaryColor="#c30f08"
11
- backgroundColor="#ffffff"
12
- secondaryBackgroundColor="#eceded"
13
- textColor="#000000"
14
- font="sans serif"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Development Banks Collaboration Analyzer
3
- emoji: 🐢
4
- colorFrom: pink
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.32.2
8
- app_file: app.py
9
- pinned: true
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/app.cpython-310.pyc DELETED
Binary file (664 Bytes)
 
__pycache__/crs.cpython-310.pyc DELETED
Binary file (3.71 kB)
 
__pycache__/home.cpython-310.pyc DELETED
Binary file (447 Bytes)
 
__pycache__/sector.cpython-310.pyc DELETED
Binary file (6.06 kB)
 
__pycache__/similarity.cpython-310.pyc DELETED
Binary file (3.66 kB)
 
__pycache__/similarity_page.cpython-310.pyc DELETED
Binary file (9.48 kB)
 
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
-
3
  # PAGE CONFIG
4
  st.set_page_config(
5
  page_title='Development Banks Collaboration Analyzer',
 
1
  import streamlit as st
 
2
  # PAGE CONFIG
3
  st.set_page_config(
4
  page_title='Development Banks Collaboration Analyzer',
functions/__pycache__/calc_matches.cpython-310.pyc DELETED
Binary file (922 Bytes)
 
functions/__pycache__/filter_projects.cpython-310.pyc DELETED
Binary file (1.81 kB)
 
functions/__pycache__/semantic_search.cpython-310.pyc DELETED
Binary file (1.07 kB)
 
functions/__pycache__/single_similar.cpython-310.pyc DELETED
Binary file (672 Bytes)
 
functions/calc_matches.py DELETED
@@ -1,37 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
- def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
5
- # matching project2 can be nay project
6
- # indecies (rows) = project1
7
- # columns = project2
8
- # -> find matches
9
-
10
- # filter out all row considering the filter
11
- filtered_df_indecies_list = filtered_df.index
12
- project_df_indecies_list = project_df.index
13
-
14
- np.fill_diagonal(similarity_matrix, 0)
15
- match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
16
-
17
- best_matches_list = np.argsort(match_matrix, axis=None)
18
-
19
- if len(best_matches_list) < top_x:
20
- top_x = len(best_matches_list)
21
-
22
- # get row (project1) and column (project2) with highest similarity in filtered df
23
- top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
24
-
25
- # get the corresponding similarity values
26
- top_values = match_matrix[top_indices]
27
-
28
- p1_df = filtered_df.iloc[top_indices[0]]
29
- p1_df["similarity"] = top_values
30
- p2_df = project_df.iloc[top_indices[1]]
31
- p2_df["similarity"] = top_values
32
-
33
- return p1_df, p2_df
34
-
35
-
36
-
37
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/different_orga_filter.py DELETED
@@ -1,12 +0,0 @@
1
- import pandas as pd
2
-
3
- def different_orga_filter(df, orga):
4
- # FILTER COUNTRY
5
- country_filtered_df = pd.DataFrame()
6
- for c in country_code_list:
7
- c_df = df[df["country"].str.contains(c, na=False)]
8
- country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
9
-
10
- df = country_filtered_df
11
-
12
- return country_filtered_df
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/filter_projects.py DELETED
@@ -1,54 +0,0 @@
1
- import pandas as pd
2
- from functions.semantic_search import search
3
-
4
- def contains_code(crs_codes, code_list):
5
- codes = str(crs_codes).split(';')
6
- return any(code in code_list for code in codes)
7
-
8
- def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
9
- #query,
10
- model,
11
- #embeddings,
12
- TOP_X_PROJECTS=30):
13
- # Check if filters where not all should be selected are empty
14
- if crs3_list != [] or crs5_list != [] or sdg_str != "": #or query != "":
15
-
16
- # FILTER CRS
17
- if crs3_list and not crs5_list:
18
- df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
19
- elif crs3_list and crs5_list:
20
- df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
21
- elif not crs3_list and crs5_list:
22
- df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
23
-
24
- # FILTER SDG
25
- if sdg_str != "":
26
- df = df[df["sgd_pred_code"] == int(sdg_str)]
27
-
28
- # FILTER COUNTRY
29
- if country_code_list != []:
30
- country_filtered_df = pd.DataFrame()
31
- for c in country_code_list:
32
- c_df = df[df["country"].str.contains(c, na=False)]
33
- country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
34
-
35
- df = country_filtered_df
36
-
37
- # FILTER ORGANIZATION
38
- if orga_code_list != []:
39
- df = df[df['orga_abbreviation'].isin(orga_code_list)]
40
-
41
- # FILTER QUERY
42
- """
43
- if query != "" and len(df) > 0:
44
- if len(df) < TOP_X_PROJECTS:
45
- TOP_X_PROJECTS = len(df)
46
- df = search(query, model, embeddings, df, TOP_X_PROJECTS)
47
- """
48
-
49
-
50
- return df
51
-
52
-
53
-
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/filter_single.py DELETED
@@ -1,22 +0,0 @@
1
- import pandas as pd
2
- from functions.semantic_search import search
3
-
4
- def contains_code(crs_codes, code_list):
5
- codes = str(crs_codes).split(';')
6
- return any(code in code_list for code in codes)
7
-
8
- def filter_single(df, country_code_list, orga_code_list):
9
- # FILTER COUNTRY
10
- if country_code_list != []:
11
- country_filtered_df = pd.DataFrame()
12
- for c in country_code_list:
13
- c_df = df[df["country"].str.contains(c, na=False)]
14
- country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
15
-
16
- df = country_filtered_df
17
-
18
- # FILTER ORGANIZATION
19
- if orga_code_list != []:
20
- df = df[df['orga_abbreviation'].isin(orga_code_list)]
21
-
22
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/same_country_filter.py DELETED
@@ -1,16 +0,0 @@
1
- import pandas as pd
2
- from functions.semantic_search import search
3
-
4
- def same_country_filter(df, country_code_list):
5
- # FILTER COUNTRY
6
- if country_code_list != []:
7
- country_filtered_df = pd.DataFrame()
8
- for c in country_code_list:
9
- c_df = df[df["country"].str.contains(c, na=False)]
10
- country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
11
-
12
- df = country_filtered_df
13
-
14
- return country_filtered_df
15
- else:
16
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/semantic_search.py DELETED
@@ -1,27 +0,0 @@
1
- import pickle
2
- import faiss
3
- import streamlit as st
4
- from sentence_transformers import SentenceTransformer
5
- import pandas as pd
6
-
7
- def search(query, model, embeddings, filtered_df, top_x=20):
8
-
9
- filtered_df_indecies_list = filtered_df.index
10
- filtered_embeddings = embeddings[filtered_df_indecies_list]
11
-
12
- # Load or create FAISS index
13
- dimension = filtered_embeddings.shape[1]
14
- faiss_index = faiss.IndexFlatL2(dimension)
15
- faiss_index.add(filtered_embeddings)
16
-
17
- # Convert query to embedding
18
- query_embedding = model.encode([query])[0].reshape(1, -1)
19
-
20
- # Perform search
21
- D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
22
-
23
- # Extract the sentences corresponding to the top indices
24
- #print(filtered_df.columns())
25
- top_indecies = [i for i in I[0]]
26
-
27
- return filtered_df.iloc[top_indecies]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/single_similar.py DELETED
@@ -1,25 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
- def find_similar(p_index, similarity_matrix, filtered_df, top_x):
5
-
6
- # filter out just projects from filtered df
7
- filtered_indices = filtered_df.index.tolist()
8
-
9
- index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
10
-
11
- filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
12
-
13
- # filter out the row of the selected poject
14
- project_row = filtered_column_sim_matrix[p_index]
15
- sorted_indices = np.argsort(project_row)
16
- top_10_indices_descending = sorted_indices[-10:][::-1]
17
- #top_10_original_indices = [index_position_mapping[position] for position in top_10_indices_descending]
18
- top_10_values_descending = project_row[top_10_indices_descending]
19
-
20
- result_df = filtered_df.iloc[top_10_indices_descending]
21
- result_df["similarity"] = top_10_values_descending
22
-
23
- return result_df
24
-
25
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/__pycache__/crs_table.cpython-310.pyc DELETED
Binary file (1.21 kB)
 
modules/__pycache__/filter_modules.cpython-310.pyc DELETED
Binary file (997 Bytes)
 
modules/__pycache__/filter_projects.cpython-310.pyc DELETED
Binary file (979 Bytes)
 
modules/__pycache__/navbar.cpython-310.pyc DELETED
Binary file (784 Bytes)
 
modules/__pycache__/result_table.cpython-310.pyc DELETED
Binary file (2.65 kB)
 
modules/__pycache__/sdg_table.cpython-310.pyc DELETED
Binary file (1.19 kB)
 
modules/__pycache__/semantic_search.cpython-310.pyc DELETED
Binary file (1.17 kB)
 
modules/__pycache__/similarity_table.cpython-310.pyc DELETED
Binary file (1.41 kB)
 
modules/multimatch_result_table.py DELETED
@@ -1,134 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
-
4
- def show_multi_table(p1_df, p2_df):
5
- st.write("------------------")
6
-
7
- p1_df = p1_df.reset_index(drop=True)
8
- p2_df = p2_df.reset_index(drop=True)
9
-
10
- actual_ind = 0
11
- for i in range(len(p1_df) - 1, -1, -2): # stepsize because project matchs in both ways and it should only display a match one time
12
- actual_ind += 1
13
- match_df = pd.DataFrame()
14
- row_from_p1 = p1_df.iloc[[i]]
15
- row_from_p2 = p2_df.iloc[[i]]
16
-
17
- # INTEGRATE IN PREPROCESSING !!!
18
- # transform strings to list
19
- try:
20
- row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_name'].item().split(";")[:-1]]
21
- row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_name'].item().split(";")[:-1]]
22
- except:
23
- row_from_p1["crs_3_code_list"] = [""]
24
- row_from_p2["crs_3_code_list"] = [""]
25
-
26
- try:
27
- row_from_p1["crs_5_code_list"] = [row_from_p1['crs_5_name'].item().split(";")[:-1]]
28
- row_from_p2["crs_5_code_list"] = [row_from_p2['crs_5_name'].item().split(";")[:-1]]
29
- except:
30
- row_from_p1["crs_5_code_list"] = [""]
31
- row_from_p2["crs_5_code_list"] = [""]
32
-
33
- row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
34
- row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
35
-
36
- try:
37
- row_from_p1["flag"] = f"https://flagicons.lipis.dev/flags/4x3/{row_from_p1['country'].item()[:2].lower()}.svg"
38
- row_from_p2["flag"] = f"https://flagicons.lipis.dev/flags/4x3/{row_from_p2['country'].item()[:2].lower()}.svg"
39
- except:
40
- row_from_p1["flag"] = "https://flagicons.lipis.dev/flags/4x3/xx.svg"
41
- row_from_p2["flag"] = "https://flagicons.lipis.dev/flags/4x3/xx.svg"
42
-
43
- #print(row_from_p1["flag"].item())
44
-
45
- # Correctly append rows to match_df
46
- #st.subheader(f"#{actual_ind}")
47
- #st.caption(f"Similarity: {round(row_from_p1['similarity'].item(), 4) * 100}%")
48
- match_df = pd.concat([row_from_p1, row_from_p2], ignore_index=True)
49
-
50
- col1, col2 = st.columns([1, 12])
51
- with col1:
52
-
53
- # remove arrow from standart st.metric()
54
- st.write(
55
- """
56
- <style>
57
- [data-testid="stMetricDelta"] svg {
58
- display: none;
59
- }
60
- </style>
61
- """,
62
- unsafe_allow_html=True,
63
- )
64
-
65
- st.metric(label="Match", value=f"{actual_ind}", delta=f"~ {str(round(row_from_p1['similarity'].item(), 5) * 100)[:4]} %")
66
-
67
- with col2:
68
- st.write(" ")
69
- st.dataframe(
70
- match_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
71
- use_container_width = True,
72
- height = 35 + 35 * len(match_df),
73
- column_config={
74
- "iati_id": st.column_config.TextColumn(
75
- "IATI ID",
76
- help="IATI Project ID",
77
- disabled=True,
78
- width="small"
79
- ),
80
- "orga_abbreviation": st.column_config.TextColumn(
81
- "Organization",
82
- help="If description not in English, description in other language provided",
83
- disabled=True,
84
- width="small"
85
- ),
86
- "client": st.column_config.TextColumn(
87
- "Client",
88
- help="Client organization of customer",
89
- disabled=True,
90
- width="small"
91
- ),
92
- "title_main": st.column_config.TextColumn(
93
- "Title",
94
- help="If title not in English, title in other language provided",
95
- disabled=True,
96
- width="large"
97
- ),
98
- "description_main": st.column_config.TextColumn(
99
- "Description",
100
- help="If description not in English, description in other language provided",
101
- disabled=True,
102
- width="large"
103
- ),
104
- "country_name": st.column_config.TextColumn(
105
- "Country",
106
- help="Country of project",
107
- disabled=True,
108
- width="small"
109
- ),
110
- "flag": st.column_config.ImageColumn(
111
- "Flag",
112
- help="country flag",
113
- width="small"
114
- ),
115
- "sdg_list": st.column_config.ListColumn(
116
- "SDG Prediction",
117
- help="Prediction of SDG's",
118
- width="small"
119
- ),
120
- "crs_3_code_list": st.column_config.ListColumn(
121
- "CRS 3",
122
- help="CRS 3 code given by organization",
123
- width="medium"
124
- ),
125
- "crs_5_code_list": st.column_config.ListColumn(
126
- "CRS 5",
127
- help="CRS 5 code given by organization",
128
- width="medium"
129
- ),
130
- },
131
- hide_index=True,
132
- )
133
-
134
- st.write("------------------")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modules/navbar.py DELETED
@@ -1,39 +0,0 @@
1
- import streamlit as st
2
- import similarity_page
3
-
4
- # giz-dsc colors
5
- # orange: #e5b50d
6
- # green: #48d47b
7
- # blue: #0da2dc
8
- # grey: #dadada
9
-
10
- # giz colors https://www.giz.de/cdc/en/html/59638.html
11
- # red: #c80f0f
12
- # grey: #6f6f6f
13
- # light_grey: #b2b2b2
14
- # light_red: #eba1a3
15
-
16
- def show_navbar():
17
- #st.markdown("<h1 style='color: red;'>THIS APP IS WORK IN PROGRESS ...</h1>", unsafe_allow_html=True)
18
-
19
- #st.title("Development Bank Synergy Mapper")
20
-
21
- # enlarge tab fontsizes
22
- css = '''
23
- <style>
24
- .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
25
- font-size:1rem;
26
- }
27
- </style>
28
- '''
29
- st.markdown(css, unsafe_allow_html=True)
30
- tab1, tab2 = st.tabs([
31
- "🔍 Multi-Project Matching",
32
- "🎯 Single-Project Matching"
33
- ])
34
-
35
- with tab1:
36
- similarity_page.show_multi_matching_page()
37
-
38
- with tab2:
39
- similarity_page.show_single_matching_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,10 +0,0 @@
1
- numpy==1.26.4
2
- pandas==2.1.4
3
- streamlit==1.32.2
4
- streamlit-option-menu==0.3.12
5
- scipy==1.12.0
6
- faiss-cpu==1.8.0
7
- faiss-gpu==1.7.2
8
- sentence-transformers==2.5.1
9
- streamlit-aggrid==0.3.4
10
- psutil==5.9.0
 
 
 
 
 
 
 
 
 
 
 
similarity_page.py CHANGED
@@ -111,7 +111,6 @@ def load_model():
111
  return model
112
 
113
  # Load Embeddings
114
- """
115
  @st.cache_data
116
  def load_embeddings_and_index():
117
  # Load embeddings
@@ -120,7 +119,7 @@ def load_embeddings_and_index():
120
  embeddings = stored_data["embeddings"]
121
 
122
  return embeddings
123
- """
124
 
125
  # USE CACHE FUNCTIONS
126
  sim_matrix = load_sim_matrix()
@@ -135,7 +134,7 @@ COUNTRY_OPTION_LIST = getCountry()
135
 
136
  # LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
137
  model = load_model()
138
- #embeddings = load_embeddings_and_index()
139
 
140
  def show_multi_matching_page():
141
  #st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
@@ -199,7 +198,7 @@ def show_multi_matching_page():
199
  )
200
 
201
  # SEARCH BOX
202
- #query = st.text_input("Search Query")
203
 
204
  with col3:
205
  # COUNTRY SELECTION
@@ -243,11 +242,7 @@ def show_multi_matching_page():
243
 
244
  # FILTER DF WITH SELECTED FILTER OPTIONS
245
  TOP_X_PROJECTS = 30
246
- filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
247
- #query,
248
- model,
249
- #embeddings,
250
- TOP_X_PROJECTS)
251
  if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
252
  # FIND MATCHES
253
  ## If only same country checkbox i sactivated
@@ -317,7 +312,6 @@ def show_single_matching_page():
317
  else:
318
  search_list = title_search_list
319
 
320
-
321
  project_option = st.selectbox(
322
  label = 'Search for a project',
323
  index = None,
 
111
  return model
112
 
113
  # Load Embeddings
 
114
  @st.cache_data
115
  def load_embeddings_and_index():
116
  # Load embeddings
 
119
  embeddings = stored_data["embeddings"]
120
 
121
  return embeddings
122
+
123
 
124
  # USE CACHE FUNCTIONS
125
  sim_matrix = load_sim_matrix()
 
134
 
135
  # LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
136
  model = load_model()
137
+ embeddings = load_embeddings_and_index()
138
 
139
  def show_multi_matching_page():
140
  #st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
 
198
  )
199
 
200
  # SEARCH BOX
201
+ query = st.text_input("Search Query")
202
 
203
  with col3:
204
  # COUNTRY SELECTION
 
242
 
243
  # FILTER DF WITH SELECTED FILTER OPTIONS
244
  TOP_X_PROJECTS = 30
245
+ filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS)
 
 
 
 
246
  if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
247
  # FIND MATCHES
248
  ## If only same country checkbox i sactivated
 
312
  else:
313
  search_list = title_search_list
314
 
 
315
  project_option = st.selectbox(
316
  label = 'Search for a project',
317
  index = None,
src/codelists/country_codes_ISO3166-1alpha-2.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ff1ad92034a4a593138fcbb7570ec5015c3c28a4476f95015a39d0bf257382a
3
- size 13113
 
 
 
 
src/codelists/crs3_codes.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfd7bf86baf7bbc54c880c098b89b803adfb060c2c9ba55ee976cc47c2be426a
3
- size 3218
 
 
 
 
src/codelists/crs5_codes.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:84a522ad573ad1866835cb24efc7984016ef17b9990ac2484345705ac82a0d80
3
- size 100133
 
 
 
 
src/codelists/flags/AC.png DELETED
Binary file (182 kB)
 
src/codelists/flags/AD.png DELETED
Binary file (56.2 kB)
 
src/codelists/flags/AE.png DELETED
Binary file (973 Bytes)
 
src/codelists/flags/AF.png DELETED
Binary file (107 kB)
 
src/codelists/flags/AG.png DELETED
Binary file (16 kB)
 
src/codelists/flags/AI-alt.png DELETED
Binary file (14.9 kB)
 
src/codelists/flags/AI.png DELETED
Binary file (18.6 kB)
 
src/codelists/flags/AL.png DELETED
Binary file (34.8 kB)
 
src/codelists/flags/AM.png DELETED
Binary file (717 Bytes)
 
src/codelists/flags/AO.png DELETED
Binary file (20.5 kB)
 
src/codelists/flags/AQ.png DELETED
Binary file (13.8 kB)
 
src/codelists/flags/AR.png DELETED
Binary file (46.9 kB)
 
src/codelists/flags/AS.png DELETED
Binary file (101 kB)
 
src/codelists/flags/AT.png DELETED
Binary file (292 Bytes)