Spaces:

GIZ
/

Development-Project-Synergy-Finder

Running on CPU Upgrade

App Files Files Community

Development-Project-Synergy-Finder / functions /single_project_matching.py

annikwag

Upload 1639 files

883557f verified 4 months ago

raw

history blame

1.88 kB

	import numpy as np
	from scipy.sparse import csr_matrix

	"""
	Function to find similar project for the single project matching

	Single Project Matching empowers you to choose an individual project using
	either the project IATI ID or title, and then unveils the top x projects within a filter (filtered_df) that
	bear the closest resemblance to your selected one (p_index).
	"""

	def find_similar(p_index, similarity_matrix, filtered_df, top_x):
	"""
	p_index: index of selected project
	similarity_matrix: matrix with similarities of all projects
	filtered_df: df with filter applied
	top_x: top x project which should be displayed
	"""

	# convert npz sparse matrix into csr matrix
	if not isinstance(similarity_matrix, csr_matrix):
	similarity_matrix = csr_matrix(similarity_matrix)

	# filter out just projects from filtered_df
	filtered_indices = filtered_df.index.tolist()
	filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]

	# create a mapping from new position to original indices
	index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}

	# select just the row of th similarity matrix of the selected project index
	project_row = filtered_column_sim_matrix.getrow(p_index).toarray().ravel()

	# find top_x indices with the highest similarity scores in the row
	sorted_indices = np.argsort(project_row)[-top_x:][::-1]
	top_indices = [index_position_mapping[i] for i in sorted_indices]
	top_values = project_row[sorted_indices]

	# create result df with all top_x similar projects
	result_df = filtered_df.loc[top_indices]
	result_df['similarity'] = top_values

	# filter out rows with similarity score less than 30
	result_df = result_df[result_df['similarity'] > 0]

	return result_df