Jan Mühlnikel commited on
Commit
00341f5
1 Parent(s): 2080e6b

enhanced documentation

Browse files
functions/different_orga_filter.py DELETED
@@ -1,12 +0,0 @@
1
- import pandas as pd
2
-
3
- def different_orga_filter(df, orga):
4
- # FILTER COUNTRY
5
- country_filtered_df = pd.DataFrame()
6
- for c in country_code_list:
7
- c_df = df[df["country"].str.contains(c, na=False)]
8
- country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
9
-
10
- df = country_filtered_df
11
-
12
- return country_filtered_df
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/semantic_search.py CHANGED
@@ -1,9 +1,8 @@
1
- import pickle
2
  import faiss
3
- import streamlit as st
4
- from sentence_transformers import SentenceTransformer
5
- import pandas as pd
6
 
 
 
 
7
  def search(query, model, embeddings, filtered_df, top_x=20):
8
 
9
  filtered_df_indecies_list = filtered_df.index
@@ -21,7 +20,6 @@ def search(query, model, embeddings, filtered_df, top_x=20):
21
  D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
22
 
23
  # Extract the sentences corresponding to the top indices
24
- #print(filtered_df.columns())
25
  top_indecies = [i for i in I[0]]
26
 
27
  return filtered_df.iloc[top_indecies]
 
 
1
  import faiss
 
 
 
2
 
3
+ """
4
+ Semantic Search Function
5
+ """
6
  def search(query, model, embeddings, filtered_df, top_x=20):
7
 
8
  filtered_df_indecies_list = filtered_df.index
 
20
  D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
21
 
22
  # Extract the sentences corresponding to the top indices
 
23
  top_indecies = [i for i in I[0]]
24
 
25
  return filtered_df.iloc[top_indecies]
modules/singlematch_result_table.py CHANGED
@@ -1,8 +1,17 @@
1
  import streamlit as st
2
- import pandas as pd
 
 
 
 
 
3
 
4
  def show_single_table(selected_project_index, projects_df, result_df):
 
 
 
5
 
 
6
  result_df['crs_3_code_list'] = result_df['crs_3_name'].apply(
7
  lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";")[:-1])
8
  )
@@ -25,6 +34,7 @@ def show_single_table(selected_project_index, projects_df, result_df):
25
  lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
26
  )
27
 
 
28
  st.subheader("Reference Project")
29
  st.dataframe(
30
  sel_p_row[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "country_flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
@@ -92,6 +102,7 @@ def show_single_table(selected_project_index, projects_df, result_df):
92
  )
93
 
94
 
 
95
  if len(result_df) == 0:
96
  st.write("No results found!")
97
  else:
@@ -112,12 +123,6 @@ def show_single_table(selected_project_index, projects_df, result_df):
112
  min_value=0,
113
  max_value=100,
114
  ),
115
- #"similarity": st.column_config.TextColumn(
116
- # "Similarity",
117
- # help="Similarity",
118
- # disabled=True,
119
- # width="small"
120
- #),
121
  "iati_id": st.column_config.TextColumn(
122
  "IATI ID",
123
  help="IATI Project ID",
 
1
  import streamlit as st
2
+
3
+
4
+ """
5
+ Result table of the Single Project Matching
6
+ """
7
+
8
 
9
  def show_single_table(selected_project_index, projects_df, result_df):
10
+
11
+ """
12
+ TODO: Add this to preprocessing
13
 
14
+ """
15
  result_df['crs_3_code_list'] = result_df['crs_3_name'].apply(
16
  lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";")[:-1])
17
  )
 
34
  lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
35
  )
36
 
37
+ # Displaye selected project and infos
38
  st.subheader("Reference Project")
39
  st.dataframe(
40
  sel_p_row[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "country_flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
 
102
  )
103
 
104
 
105
+ # Display the similar projects of teh slected project
106
  if len(result_df) == 0:
107
  st.write("No results found!")
108
  else:
 
123
  min_value=0,
124
  max_value=100,
125
  ),
 
 
 
 
 
 
126
  "iati_id": st.column_config.TextColumn(
127
  "IATI ID",
128
  help="IATI Project ID",