import numpy as np from scipy.sparse import csr_matrix """ Function to find similar project for the single project matching Single Project Matching empowers you to choose an individual project using either the project IATI ID or title, and then unveils the top x projects within a filter (filtered_df) that bear the closest resemblance to your selected one (p_index). """ def find_similar(p_index, similarity_matrix, filtered_df, top_x): """ p_index: index of selected project similarity_matrix: matrix with similarities of all projects filtered_df: df with filter applied top_x: top x project which should be displayed """ # convert npz sparse matrix into csr matrix if not isinstance(similarity_matrix, csr_matrix): similarity_matrix = csr_matrix(similarity_matrix) # filter out just projects from filtered_df filtered_indices = filtered_df.index.tolist() filtered_column_sim_matrix = similarity_matrix[:, filtered_indices] # create a mapping from new position to original indices index_position_mapping = {position: index for position, index in enumerate(filtered_indices)} # select just the row of th similarity matrix of the selected project index project_row = filtered_column_sim_matrix.getrow(p_index).toarray().ravel() # find top_x indices with the highest similarity scores in the row sorted_indices = np.argsort(project_row)[-top_x:][::-1] top_indices = [index_position_mapping[i] for i in sorted_indices] top_values = project_row[sorted_indices] # create result df with all top_x similar projects result_df = filtered_df.loc[top_indices] result_df['similarity'] = top_values # filter out rows with similarity score less than 30 result_df = result_df[result_df['similarity'] > 0] return result_df