Spaces:

QCDevs
/

Selector_GSoC

Sleeping

App Files Files Community

Selector_GSoC / streamlit_app /utils.py

legend1234

Initial draft (#252)

bbcd4a0 unverified 11 months ago

raw

history blame contribute delete

5.86 kB

	# The Selector library provides a set of tools for selecting a
	# subset of the dataset and computing diversity.
	#
	# Copyright (C) 2023 The QC-Devs Community
	#
	# This file is part of Selector.
	#
	# Selector is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 3
	# of the License, or (at your option) any later version.
	#
	# Selector is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, see <http://www.gnu.org/licenses/>
	#
	# --

	import streamlit as st
	import numpy as np
	import pandas as pd
	import json
	import os

	from sklearn.metrics import pairwise_distances

	def set_page_config(page_title, page_icon):
	current_dir = os.path.dirname(os.path.abspath(__file__))
	assets_dir = os.path.join(current_dir, "..", "assets")

	st.set_page_config(
	page_title=page_title,
	page_icon=os.path.join(assets_dir, page_icon)
	)

	def display_sidebar_info(title, description, references):
	st.sidebar.header(title)
	st.sidebar.info(description)
	st.sidebar.title("References")
	st.sidebar.info(references)

	# Load data from matrix file
	def load_matrix(matrix_file):
	try:
	header_option = None
	if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"):
	header_option = st.checkbox("Does the file have a header?", key="header_option",
	on_change = clear_results())
	st.warning("Warning: This will affect the final output if not specified correctly.")

	if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"):
	if header_option:
	# Load the matrix with header
	matrix = pd.read_csv(matrix_file)
	else:
	# Load the matrix without header
	matrix = pd.read_csv(matrix_file, header=None)
	st.write("Matrix shape:", matrix.shape)
	st.write(matrix.values)


	elif matrix_file.name.endswith(".npz"):
	matrix_data = np.load(matrix_file)
	array_names = matrix_data.files # Select the array in the .npz file
	selected_array = st.selectbox("Select the array to use", array_names)
	matrix = matrix_data[selected_array]
	st.write("Matrix shape:", matrix.shape)
	st.write(matrix)
	elif matrix_file.name.endswith(".npy"):
	matrix = np.load(matrix_file)
	st.write("Matrix shape:", matrix.shape)
	st.write(matrix)
	return matrix
	except Exception as e:
	st.error(f'An error occurred while loading matrix file: {e}')
	return None

	def load_labels(label_file):
	try:
	label_header_option = None
	if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"):
	label_header_option = st.checkbox("Does the file have a header?", key="label_header_option",
	on_change = clear_results())
	st.warning("Warning: This will affect the final output if not specified correctly.")

	if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"):
	if label_header_option:
	labels = pd.read_csv(label_file).values.flatten()
	else:
	labels = pd.read_csv(label_file, header=None).values.flatten()
	st.write("Cluster labels shape:", labels.shape)
	st.write(labels)
	return labels
	except Exception as e:
	st.error(f'An error occurred while loading cluster label file: {e}')
	return None

	def run_algorithm(selector, matrix, num_points, labels):
	try:
	# Separate the non-numeric first column (element names) and the numeric data
	element_names = matrix.iloc[:, 0].values # Assuming the first column contains the names
	numeric_matrix = matrix.select_dtypes(include=[np.number]).values

	if labels is not None:
	selected_ids = selector.select(numeric_matrix, size = num_points, labels = labels)
	else:
	selected_ids = selector.select(numeric_matrix, size = num_points)

	selected_ids = [(element_names[i], i) for i in selected_ids]
	st.session_state['selected_ids'] = selected_ids
	return selected_ids
	except ValueError as ve:
	st.error(f"An error occurred while running the algorithm: {ve}")
	except Exception as e:
	st.error(f"An error occurred while running the algorithm: {e}")
	return None

	def export_results(selected_ids):
	export_format = st.selectbox("Select export format", ["CSV", "JSON"], key="export_format")

	if export_format == "CSV":
	csv_data = pd.DataFrame(selected_ids, columns=["Element", "Index"])
	csv = csv_data.to_csv(index=False).encode('utf-8')
	st.download_button(
	label="Download as CSV",
	data=csv,
	file_name='selected_indices.csv',
	mime='text/csv',
	)
	else:
	json_data = json.dumps([{"Element": elem, "Index": i} for i, elem in selected_ids])
	st.download_button(
	label="Download as JSON",
	data=json_data,
	file_name='selected_indices.json',
	mime='application/json',
	)

	# Function to clear selected indices from session state
	def clear_results():
	if 'selected_ids' in st.session_state:
	del st.session_state['selected_ids']
	if 'selector' in st.session_state:
	del st.session_state['selector']