Jan Mühlnikel commited on
Commit
c8e0175
1 Parent(s): 21b6daa

added single matching result table

Browse files
__pycache__/similarity_page.cpython-310.pyc CHANGED
Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ
 
functions/__pycache__/calc_matches.cpython-310.pyc CHANGED
Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ
 
functions/__pycache__/single_similar.cpython-310.pyc ADDED
Binary file (675 Bytes). View file
 
functions/single_similar.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ def find_similar(p_index, similarity_matrix, projects_df, top_x):
5
+ selected_row = similarity_matrix[p_index]
6
+ top_indexes = np.argsort(selected_row)[-10:][::-1]
7
+ top_values = selected_row[top_indexes]
8
+
9
+ top_projects_df = projects_df.iloc[top_indexes]
10
+
11
+ top_projects_df["similarity"] = top_values
12
+
13
+ return top_projects_df
14
+
15
+
modules/{result_table.py → multimatch_result_table.py} RENAMED
@@ -1,8 +1,7 @@
1
  import streamlit as st
2
- from st_aggrid import AgGrid, GridOptionsBuilder
3
  import pandas as pd
4
 
5
- def show_table(p1_df, p2_df):
6
  st.write("------------------")
7
 
8
  p1_df = p1_df.reset_index(drop=True)
 
1
  import streamlit as st
 
2
  import pandas as pd
3
 
4
+ def show_multi_table(p1_df, p2_df):
5
  st.write("------------------")
6
 
7
  p1_df = p1_df.reset_index(drop=True)
modules/singlematch_result_table.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ def show_single_table(result_df):
5
+
6
+ result_df = result_df.reset_index(drop=True)
7
+
8
+ # Transformations
9
+ result_df["crs_3_code_list"] = result_df['crs_3_code'].str.split(";").apply(lambda x: x[:-1] if x else [])
10
+ result_df["crs_5_code_list"] = result_df['crs_5_code'].str.split(";").apply(lambda x: x[:-1] if x else [])
11
+ result_df["sdg_list"] = result_df['sgd_pred_code'].apply(lambda x: [x] if pd.notna(x) else [])
12
+ result_df["flag"] = result_df['country'].apply(lambda x: f"https://flagicons.lipis.dev/flags/4x3/{x[:2].lower()}.svg" if pd.notna(x) else "https://flagicons.lipis.dev/flags/4x3/xx.svg")
13
+
14
+ st.dataframe(
15
+ result_df[["similarity", "iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country", "flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
16
+ use_container_width = True,
17
+ height = 35 + 35 * len(result_df),
18
+ column_config={
19
+ "similarity": st.column_config.TextColumn(
20
+ "Similarity",
21
+ help="similarity to selected project",
22
+ disabled=True,
23
+ width="small"
24
+ ),
25
+ "iati_id": st.column_config.TextColumn(
26
+ "IATI ID",
27
+ help="IATI Project ID",
28
+ disabled=True,
29
+ width="small"
30
+ ),
31
+ "orga_abbreviation": st.column_config.TextColumn(
32
+ "Organization",
33
+ help="If description not in English, description in other language provided",
34
+ disabled=True,
35
+ width="small"
36
+ ),
37
+ "client": st.column_config.TextColumn(
38
+ "Client",
39
+ help="Client organization of customer",
40
+ disabled=True,
41
+ width="small"
42
+ ),
43
+ "title_main": st.column_config.TextColumn(
44
+ "Title",
45
+ help="If title not in English, title in other language provided",
46
+ disabled=True,
47
+ width="large"
48
+ ),
49
+ "description_main": st.column_config.TextColumn(
50
+ "Description",
51
+ help="If description not in English, description in other language provided",
52
+ disabled=True,
53
+ width="large"
54
+ ),
55
+ "country": st.column_config.TextColumn(
56
+ "Country",
57
+ help="Country of project",
58
+ disabled=True,
59
+ width="small"
60
+ ),
61
+ "flag": st.column_config.ImageColumn(
62
+ "Flag",
63
+ help="country flag",
64
+ width="small"
65
+ ),
66
+ "sdg_list": st.column_config.ListColumn(
67
+ "SDG Prediction",
68
+ help="Prediction of SDG's",
69
+ width="small"
70
+ ),
71
+ "crs_3_code_list": st.column_config.ListColumn(
72
+ "CRS 3",
73
+ help="CRS 3 code given by organization",
74
+ width="small"
75
+ ),
76
+ "crs_5_code_list": st.column_config.ListColumn(
77
+ "CRS 5",
78
+ help="CRS 5 code given by organization",
79
+ width="small"
80
+ ),
81
+ },
82
+ hide_index=True,
83
+ )
similarity_page.py CHANGED
@@ -10,10 +10,12 @@ import pandas as pd
10
  from scipy.sparse import load_npz
11
  import pickle
12
  from sentence_transformers import SentenceTransformer
13
- from modules.result_table import show_table
 
14
  from functions.filter_projects import filter_projects
15
  from functions.calc_matches import calc_matches
16
  from functions.same_country_filter import same_country_filter
 
17
  import psutil
18
  import os
19
  import gc
@@ -235,7 +237,7 @@ def show_multi_matching_page():
235
  p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
236
 
237
  # SHOW THE RESULT
238
- show_table(p1_df, p2_df)
239
  del p1_df, p2_df
240
  else:
241
  st.write("Select at least on CRS 3, SDG or type in a query")
@@ -265,4 +267,12 @@ def show_single_matching_page():
265
  placeholder = " ",
266
  options = search_list,
267
  )
 
 
 
 
 
 
 
 
268
 
 
10
  from scipy.sparse import load_npz
11
  import pickle
12
  from sentence_transformers import SentenceTransformer
13
+ from modules.multimatch_result_table import show_multi_table
14
+ from modules.singlematch_result_table import show_single_table
15
  from functions.filter_projects import filter_projects
16
  from functions.calc_matches import calc_matches
17
  from functions.same_country_filter import same_country_filter
18
+ from functions.single_similar import find_similar
19
  import psutil
20
  import os
21
  import gc
 
237
  p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
238
 
239
  # SHOW THE RESULT
240
+ show_multi_table(p1_df, p2_df)
241
  del p1_df, p2_df
242
  else:
243
  st.write("Select at least on CRS 3, SDG or type in a query")
 
267
  placeholder = " ",
268
  options = search_list,
269
  )
270
+
271
+ if project_option:
272
+
273
+ selected_index = search_list.index(project_option)
274
+
275
+ top_projects_df = find_similar(selected_index, sim_matrix, projects_df, 10)
276
+
277
+ show_single_table(top_projects_df)
278