Development-Project-Synergy-Finder / functions /single_project_matching.py
annikwag's picture
Upload 1639 files
883557f verified
raw
history blame
No virus
1.88 kB
import numpy as np
from scipy.sparse import csr_matrix
"""
Function to find similar project for the single project matching
Single Project Matching empowers you to choose an individual project using
either the project IATI ID or title, and then unveils the top x projects within a filter (filtered_df) that
bear the closest resemblance to your selected one (p_index).
"""
def find_similar(p_index, similarity_matrix, filtered_df, top_x):
"""
p_index: index of selected project
similarity_matrix: matrix with similarities of all projects
filtered_df: df with filter applied
top_x: top x project which should be displayed
"""
# convert npz sparse matrix into csr matrix
if not isinstance(similarity_matrix, csr_matrix):
similarity_matrix = csr_matrix(similarity_matrix)
# filter out just projects from filtered_df
filtered_indices = filtered_df.index.tolist()
filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
# create a mapping from new position to original indices
index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
# select just the row of th similarity matrix of the selected project index
project_row = filtered_column_sim_matrix.getrow(p_index).toarray().ravel()
# find top_x indices with the highest similarity scores in the row
sorted_indices = np.argsort(project_row)[-top_x:][::-1]
top_indices = [index_position_mapping[i] for i in sorted_indices]
top_values = project_row[sorted_indices]
# create result df with all top_x similar projects
result_df = filtered_df.loc[top_indices]
result_df['similarity'] = top_values
# filter out rows with similarity score less than 30
result_df = result_df[result_df['similarity'] > 0]
return result_df