Jan Mühlnikel
commited on
Commit
·
f3a1940
1
Parent(s):
39b49f4
test
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +36 -0
- .streamlit/config.toml +14 -0
- README.md +13 -0
- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/crs.cpython-310.pyc +0 -0
- __pycache__/home.cpython-310.pyc +0 -0
- __pycache__/sector.cpython-310.pyc +0 -0
- __pycache__/similarity.cpython-310.pyc +0 -0
- __pycache__/similarity_page.cpython-310.pyc +0 -0
- functions/__pycache__/calc_matches.cpython-310.pyc +0 -0
- functions/__pycache__/filter_projects.cpython-310.pyc +0 -0
- functions/__pycache__/semantic_search.cpython-310.pyc +0 -0
- functions/__pycache__/single_similar.cpython-310.pyc +0 -0
- functions/calc_matches.py +37 -0
- functions/different_orga_filter.py +12 -0
- functions/filter_single.py +22 -0
- functions/same_country_filter.py +16 -0
- functions/semantic_search.py +27 -0
- functions/single_similar.py +25 -0
- modules/__pycache__/crs_table.cpython-310.pyc +0 -0
- modules/__pycache__/filter_modules.cpython-310.pyc +0 -0
- modules/__pycache__/filter_projects.cpython-310.pyc +0 -0
- modules/__pycache__/navbar.cpython-310.pyc +0 -0
- modules/__pycache__/result_table.cpython-310.pyc +0 -0
- modules/__pycache__/sdg_table.cpython-310.pyc +0 -0
- modules/__pycache__/semantic_search.cpython-310.pyc +0 -0
- modules/__pycache__/similarity_table.cpython-310.pyc +0 -0
- modules/multimatch_result_table.py +134 -0
- requirements.txt +2 -1
- similarity_page.py +1 -3
- src/codelists/country_codes_ISO3166-1alpha-2.csv +3 -0
- src/codelists/crs3_codes.csv +3 -0
- src/codelists/crs5_codes.csv +3 -0
- src/codelists/flags/AC.png +0 -0
- src/codelists/flags/AD.png +0 -0
- src/codelists/flags/AE.png +0 -0
- src/codelists/flags/AF.png +0 -0
- src/codelists/flags/AG.png +0 -0
- src/codelists/flags/AI-alt.png +0 -0
- src/codelists/flags/AI.png +0 -0
- src/codelists/flags/AL.png +0 -0
- src/codelists/flags/AM.png +0 -0
- src/codelists/flags/AO.png +0 -0
- src/codelists/flags/AQ.png +0 -0
- src/codelists/flags/AR.png +0 -0
- src/codelists/flags/AS.png +0 -0
- src/codelists/flags/AT.png +0 -0
- src/codelists/flags/AU.png +0 -0
- src/codelists/flags/AW.png +0 -0
- src/codelists/flags/AX.png +0 -0
.gitattributes
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[global]
|
2 |
+
|
3 |
+
[server]
|
4 |
+
headless = true
|
5 |
+
|
6 |
+
[client]
|
7 |
+
initialSidebarState = "expanded"
|
8 |
+
|
9 |
+
[theme]
|
10 |
+
primaryColor="#c30f08"
|
11 |
+
backgroundColor="#ffffff"
|
12 |
+
secondaryBackgroundColor="#eceded"
|
13 |
+
textColor="#000000"
|
14 |
+
font="sans serif"
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Development Banks Collaboration Analyzer
|
3 |
+
emoji: 🐢
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: red
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.32.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: true
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/app.cpython-310.pyc
ADDED
Binary file (664 Bytes). View file
|
|
__pycache__/crs.cpython-310.pyc
ADDED
Binary file (3.71 kB). View file
|
|
__pycache__/home.cpython-310.pyc
ADDED
Binary file (447 Bytes). View file
|
|
__pycache__/sector.cpython-310.pyc
ADDED
Binary file (6.06 kB). View file
|
|
__pycache__/similarity.cpython-310.pyc
ADDED
Binary file (3.66 kB). View file
|
|
__pycache__/similarity_page.cpython-310.pyc
ADDED
Binary file (9.48 kB). View file
|
|
functions/__pycache__/calc_matches.cpython-310.pyc
ADDED
Binary file (922 Bytes). View file
|
|
functions/__pycache__/filter_projects.cpython-310.pyc
ADDED
Binary file (1.81 kB). View file
|
|
functions/__pycache__/semantic_search.cpython-310.pyc
ADDED
Binary file (1.07 kB). View file
|
|
functions/__pycache__/single_similar.cpython-310.pyc
ADDED
Binary file (672 Bytes). View file
|
|
functions/calc_matches.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
5 |
+
# matching project2 can be nay project
|
6 |
+
# indecies (rows) = project1
|
7 |
+
# columns = project2
|
8 |
+
# -> find matches
|
9 |
+
|
10 |
+
# filter out all row considering the filter
|
11 |
+
filtered_df_indecies_list = filtered_df.index
|
12 |
+
project_df_indecies_list = project_df.index
|
13 |
+
|
14 |
+
np.fill_diagonal(similarity_matrix, 0)
|
15 |
+
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
|
16 |
+
|
17 |
+
best_matches_list = np.argsort(match_matrix, axis=None)
|
18 |
+
|
19 |
+
if len(best_matches_list) < top_x:
|
20 |
+
top_x = len(best_matches_list)
|
21 |
+
|
22 |
+
# get row (project1) and column (project2) with highest similarity in filtered df
|
23 |
+
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
|
24 |
+
|
25 |
+
# get the corresponding similarity values
|
26 |
+
top_values = match_matrix[top_indices]
|
27 |
+
|
28 |
+
p1_df = filtered_df.iloc[top_indices[0]]
|
29 |
+
p1_df["similarity"] = top_values
|
30 |
+
p2_df = project_df.iloc[top_indices[1]]
|
31 |
+
p2_df["similarity"] = top_values
|
32 |
+
|
33 |
+
return p1_df, p2_df
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
functions/different_orga_filter.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def different_orga_filter(df, orga):
|
4 |
+
# FILTER COUNTRY
|
5 |
+
country_filtered_df = pd.DataFrame()
|
6 |
+
for c in country_code_list:
|
7 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
8 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
9 |
+
|
10 |
+
df = country_filtered_df
|
11 |
+
|
12 |
+
return country_filtered_df
|
functions/filter_single.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from functions.semantic_search import search
|
3 |
+
|
4 |
+
def contains_code(crs_codes, code_list):
|
5 |
+
codes = str(crs_codes).split(';')
|
6 |
+
return any(code in code_list for code in codes)
|
7 |
+
|
8 |
+
def filter_single(df, country_code_list, orga_code_list):
|
9 |
+
# FILTER COUNTRY
|
10 |
+
if country_code_list != []:
|
11 |
+
country_filtered_df = pd.DataFrame()
|
12 |
+
for c in country_code_list:
|
13 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
14 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
15 |
+
|
16 |
+
df = country_filtered_df
|
17 |
+
|
18 |
+
# FILTER ORGANIZATION
|
19 |
+
if orga_code_list != []:
|
20 |
+
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
21 |
+
|
22 |
+
return df
|
functions/same_country_filter.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from functions.semantic_search import search
|
3 |
+
|
4 |
+
def same_country_filter(df, country_code_list):
|
5 |
+
# FILTER COUNTRY
|
6 |
+
if country_code_list != []:
|
7 |
+
country_filtered_df = pd.DataFrame()
|
8 |
+
for c in country_code_list:
|
9 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
10 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
11 |
+
|
12 |
+
df = country_filtered_df
|
13 |
+
|
14 |
+
return country_filtered_df
|
15 |
+
else:
|
16 |
+
return df
|
functions/semantic_search.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import faiss
|
3 |
+
import streamlit as st
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
def search(query, model, embeddings, filtered_df, top_x=20):
|
8 |
+
|
9 |
+
filtered_df_indecies_list = filtered_df.index
|
10 |
+
filtered_embeddings = embeddings[filtered_df_indecies_list]
|
11 |
+
|
12 |
+
# Load or create FAISS index
|
13 |
+
dimension = filtered_embeddings.shape[1]
|
14 |
+
faiss_index = faiss.IndexFlatL2(dimension)
|
15 |
+
faiss_index.add(filtered_embeddings)
|
16 |
+
|
17 |
+
# Convert query to embedding
|
18 |
+
query_embedding = model.encode([query])[0].reshape(1, -1)
|
19 |
+
|
20 |
+
# Perform search
|
21 |
+
D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
|
22 |
+
|
23 |
+
# Extract the sentences corresponding to the top indices
|
24 |
+
#print(filtered_df.columns())
|
25 |
+
top_indecies = [i for i in I[0]]
|
26 |
+
|
27 |
+
return filtered_df.iloc[top_indecies]
|
functions/single_similar.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
def find_similar(p_index, similarity_matrix, filtered_df, top_x):
|
5 |
+
|
6 |
+
# filter out just projects from filtered df
|
7 |
+
filtered_indices = filtered_df.index.tolist()
|
8 |
+
|
9 |
+
index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
|
10 |
+
|
11 |
+
filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
|
12 |
+
|
13 |
+
# filter out the row of the selected poject
|
14 |
+
project_row = filtered_column_sim_matrix[p_index]
|
15 |
+
sorted_indices = np.argsort(project_row)
|
16 |
+
top_10_indices_descending = sorted_indices[-10:][::-1]
|
17 |
+
#top_10_original_indices = [index_position_mapping[position] for position in top_10_indices_descending]
|
18 |
+
top_10_values_descending = project_row[top_10_indices_descending]
|
19 |
+
|
20 |
+
result_df = filtered_df.iloc[top_10_indices_descending]
|
21 |
+
result_df["similarity"] = top_10_values_descending
|
22 |
+
|
23 |
+
return result_df
|
24 |
+
|
25 |
+
|
modules/__pycache__/crs_table.cpython-310.pyc
ADDED
Binary file (1.21 kB). View file
|
|
modules/__pycache__/filter_modules.cpython-310.pyc
ADDED
Binary file (997 Bytes). View file
|
|
modules/__pycache__/filter_projects.cpython-310.pyc
ADDED
Binary file (979 Bytes). View file
|
|
modules/__pycache__/navbar.cpython-310.pyc
ADDED
Binary file (784 Bytes). View file
|
|
modules/__pycache__/result_table.cpython-310.pyc
ADDED
Binary file (2.65 kB). View file
|
|
modules/__pycache__/sdg_table.cpython-310.pyc
ADDED
Binary file (1.19 kB). View file
|
|
modules/__pycache__/semantic_search.cpython-310.pyc
ADDED
Binary file (1.17 kB). View file
|
|
modules/__pycache__/similarity_table.cpython-310.pyc
ADDED
Binary file (1.41 kB). View file
|
|
modules/multimatch_result_table.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def show_multi_table(p1_df, p2_df):
|
5 |
+
st.write("------------------")
|
6 |
+
|
7 |
+
p1_df = p1_df.reset_index(drop=True)
|
8 |
+
p2_df = p2_df.reset_index(drop=True)
|
9 |
+
|
10 |
+
actual_ind = 0
|
11 |
+
for i in range(len(p1_df) - 1, -1, -2): # stepsize because project matchs in both ways and it should only display a match one time
|
12 |
+
actual_ind += 1
|
13 |
+
match_df = pd.DataFrame()
|
14 |
+
row_from_p1 = p1_df.iloc[[i]]
|
15 |
+
row_from_p2 = p2_df.iloc[[i]]
|
16 |
+
|
17 |
+
# INTEGRATE IN PREPROCESSING !!!
|
18 |
+
# transform strings to list
|
19 |
+
try:
|
20 |
+
row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_name'].item().split(";")[:-1]]
|
21 |
+
row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_name'].item().split(";")[:-1]]
|
22 |
+
except:
|
23 |
+
row_from_p1["crs_3_code_list"] = [""]
|
24 |
+
row_from_p2["crs_3_code_list"] = [""]
|
25 |
+
|
26 |
+
try:
|
27 |
+
row_from_p1["crs_5_code_list"] = [row_from_p1['crs_5_name'].item().split(";")[:-1]]
|
28 |
+
row_from_p2["crs_5_code_list"] = [row_from_p2['crs_5_name'].item().split(";")[:-1]]
|
29 |
+
except:
|
30 |
+
row_from_p1["crs_5_code_list"] = [""]
|
31 |
+
row_from_p2["crs_5_code_list"] = [""]
|
32 |
+
|
33 |
+
row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
|
34 |
+
row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
|
35 |
+
|
36 |
+
try:
|
37 |
+
row_from_p1["flag"] = f"https://flagicons.lipis.dev/flags/4x3/{row_from_p1['country'].item()[:2].lower()}.svg"
|
38 |
+
row_from_p2["flag"] = f"https://flagicons.lipis.dev/flags/4x3/{row_from_p2['country'].item()[:2].lower()}.svg"
|
39 |
+
except:
|
40 |
+
row_from_p1["flag"] = "https://flagicons.lipis.dev/flags/4x3/xx.svg"
|
41 |
+
row_from_p2["flag"] = "https://flagicons.lipis.dev/flags/4x3/xx.svg"
|
42 |
+
|
43 |
+
#print(row_from_p1["flag"].item())
|
44 |
+
|
45 |
+
# Correctly append rows to match_df
|
46 |
+
#st.subheader(f"#{actual_ind}")
|
47 |
+
#st.caption(f"Similarity: {round(row_from_p1['similarity'].item(), 4) * 100}%")
|
48 |
+
match_df = pd.concat([row_from_p1, row_from_p2], ignore_index=True)
|
49 |
+
|
50 |
+
col1, col2 = st.columns([1, 12])
|
51 |
+
with col1:
|
52 |
+
|
53 |
+
# remove arrow from standart st.metric()
|
54 |
+
st.write(
|
55 |
+
"""
|
56 |
+
<style>
|
57 |
+
[data-testid="stMetricDelta"] svg {
|
58 |
+
display: none;
|
59 |
+
}
|
60 |
+
</style>
|
61 |
+
""",
|
62 |
+
unsafe_allow_html=True,
|
63 |
+
)
|
64 |
+
|
65 |
+
st.metric(label="Match", value=f"{actual_ind}", delta=f"~ {str(round(row_from_p1['similarity'].item(), 5) * 100)[:4]} %")
|
66 |
+
|
67 |
+
with col2:
|
68 |
+
st.write(" ")
|
69 |
+
st.dataframe(
|
70 |
+
match_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
|
71 |
+
use_container_width = True,
|
72 |
+
height = 35 + 35 * len(match_df),
|
73 |
+
column_config={
|
74 |
+
"iati_id": st.column_config.TextColumn(
|
75 |
+
"IATI ID",
|
76 |
+
help="IATI Project ID",
|
77 |
+
disabled=True,
|
78 |
+
width="small"
|
79 |
+
),
|
80 |
+
"orga_abbreviation": st.column_config.TextColumn(
|
81 |
+
"Organization",
|
82 |
+
help="If description not in English, description in other language provided",
|
83 |
+
disabled=True,
|
84 |
+
width="small"
|
85 |
+
),
|
86 |
+
"client": st.column_config.TextColumn(
|
87 |
+
"Client",
|
88 |
+
help="Client organization of customer",
|
89 |
+
disabled=True,
|
90 |
+
width="small"
|
91 |
+
),
|
92 |
+
"title_main": st.column_config.TextColumn(
|
93 |
+
"Title",
|
94 |
+
help="If title not in English, title in other language provided",
|
95 |
+
disabled=True,
|
96 |
+
width="large"
|
97 |
+
),
|
98 |
+
"description_main": st.column_config.TextColumn(
|
99 |
+
"Description",
|
100 |
+
help="If description not in English, description in other language provided",
|
101 |
+
disabled=True,
|
102 |
+
width="large"
|
103 |
+
),
|
104 |
+
"country_name": st.column_config.TextColumn(
|
105 |
+
"Country",
|
106 |
+
help="Country of project",
|
107 |
+
disabled=True,
|
108 |
+
width="small"
|
109 |
+
),
|
110 |
+
"flag": st.column_config.ImageColumn(
|
111 |
+
"Flag",
|
112 |
+
help="country flag",
|
113 |
+
width="small"
|
114 |
+
),
|
115 |
+
"sdg_list": st.column_config.ListColumn(
|
116 |
+
"SDG Prediction",
|
117 |
+
help="Prediction of SDG's",
|
118 |
+
width="small"
|
119 |
+
),
|
120 |
+
"crs_3_code_list": st.column_config.ListColumn(
|
121 |
+
"CRS 3",
|
122 |
+
help="CRS 3 code given by organization",
|
123 |
+
width="medium"
|
124 |
+
),
|
125 |
+
"crs_5_code_list": st.column_config.ListColumn(
|
126 |
+
"CRS 5",
|
127 |
+
help="CRS 5 code given by organization",
|
128 |
+
width="medium"
|
129 |
+
),
|
130 |
+
},
|
131 |
+
hide_index=True,
|
132 |
+
)
|
133 |
+
|
134 |
+
st.write("------------------")
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ scipy==1.12.0
|
|
6 |
faiss-cpu==1.8.0
|
7 |
faiss-gpu==1.7.2
|
8 |
sentence-transformers==2.5.1
|
9 |
-
streamlit-aggrid==0.3.4.
|
|
|
|
6 |
faiss-cpu==1.8.0
|
7 |
faiss-gpu==1.7.2
|
8 |
sentence-transformers==2.5.1
|
9 |
+
streamlit-aggrid==0.3.4.
|
10 |
+
psutil==5.9.0
|
similarity_page.py
CHANGED
@@ -17,15 +17,13 @@ from functions.filter_single import filter_single
|
|
17 |
from functions.calc_matches import calc_matches
|
18 |
from functions.same_country_filter import same_country_filter
|
19 |
from functions.single_similar import find_similar
|
20 |
-
|
21 |
import os
|
22 |
import gc
|
23 |
|
24 |
-
"""
|
25 |
def get_process_memory():
|
26 |
process = psutil.Process(os.getpid())
|
27 |
return process.memory_info().rss / (1024 * 1024)
|
28 |
-
"""
|
29 |
|
30 |
# Catch DATA
|
31 |
# Load Similarity matrix
|
|
|
17 |
from functions.calc_matches import calc_matches
|
18 |
from functions.same_country_filter import same_country_filter
|
19 |
from functions.single_similar import find_similar
|
20 |
+
import psutil
|
21 |
import os
|
22 |
import gc
|
23 |
|
|
|
24 |
def get_process_memory():
|
25 |
process = psutil.Process(os.getpid())
|
26 |
return process.memory_info().rss / (1024 * 1024)
|
|
|
27 |
|
28 |
# Catch DATA
|
29 |
# Load Similarity matrix
|
src/codelists/country_codes_ISO3166-1alpha-2.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ff1ad92034a4a593138fcbb7570ec5015c3c28a4476f95015a39d0bf257382a
|
3 |
+
size 13113
|
src/codelists/crs3_codes.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfd7bf86baf7bbc54c880c098b89b803adfb060c2c9ba55ee976cc47c2be426a
|
3 |
+
size 3218
|
src/codelists/crs5_codes.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84a522ad573ad1866835cb24efc7984016ef17b9990ac2484345705ac82a0d80
|
3 |
+
size 100133
|
src/codelists/flags/AC.png
ADDED
src/codelists/flags/AD.png
ADDED
src/codelists/flags/AE.png
ADDED
src/codelists/flags/AF.png
ADDED
src/codelists/flags/AG.png
ADDED
src/codelists/flags/AI-alt.png
ADDED
src/codelists/flags/AI.png
ADDED
src/codelists/flags/AL.png
ADDED
src/codelists/flags/AM.png
ADDED
src/codelists/flags/AO.png
ADDED
src/codelists/flags/AQ.png
ADDED
src/codelists/flags/AR.png
ADDED
src/codelists/flags/AS.png
ADDED
src/codelists/flags/AT.png
ADDED
src/codelists/flags/AU.png
ADDED
src/codelists/flags/AW.png
ADDED
src/codelists/flags/AX.png
ADDED