legend1234's picture
Initial draft (#252)
bbcd4a0 unverified
# The Selector library provides a set of tools for selecting a
# subset of the dataset and computing diversity.
#
# Copyright (C) 2023 The QC-Devs Community
#
# This file is part of Selector.
#
# Selector is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# Selector is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
#
# --
import streamlit as st
import numpy as np
import pandas as pd
import json
import os
from sklearn.metrics import pairwise_distances
def set_page_config(page_title, page_icon):
current_dir = os.path.dirname(os.path.abspath(__file__))
assets_dir = os.path.join(current_dir, "..", "assets")
st.set_page_config(
page_title=page_title,
page_icon=os.path.join(assets_dir, page_icon)
)
def display_sidebar_info(title, description, references):
st.sidebar.header(title)
st.sidebar.info(description)
st.sidebar.title("References")
st.sidebar.info(references)
# Load data from matrix file
def load_matrix(matrix_file):
try:
header_option = None
if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"):
header_option = st.checkbox("Does the file have a header?", key="header_option",
on_change = clear_results())
st.warning("Warning: This will affect the final output if not specified correctly.")
if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"):
if header_option:
# Load the matrix with header
matrix = pd.read_csv(matrix_file)
else:
# Load the matrix without header
matrix = pd.read_csv(matrix_file, header=None)
st.write("Matrix shape:", matrix.shape)
st.write(matrix.values)
elif matrix_file.name.endswith(".npz"):
matrix_data = np.load(matrix_file)
array_names = matrix_data.files # Select the array in the .npz file
selected_array = st.selectbox("Select the array to use", array_names)
matrix = matrix_data[selected_array]
st.write("Matrix shape:", matrix.shape)
st.write(matrix)
elif matrix_file.name.endswith(".npy"):
matrix = np.load(matrix_file)
st.write("Matrix shape:", matrix.shape)
st.write(matrix)
return matrix
except Exception as e:
st.error(f'An error occurred while loading matrix file: {e}')
return None
def load_labels(label_file):
try:
label_header_option = None
if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"):
label_header_option = st.checkbox("Does the file have a header?", key="label_header_option",
on_change = clear_results())
st.warning("Warning: This will affect the final output if not specified correctly.")
if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"):
if label_header_option:
labels = pd.read_csv(label_file).values.flatten()
else:
labels = pd.read_csv(label_file, header=None).values.flatten()
st.write("Cluster labels shape:", labels.shape)
st.write(labels)
return labels
except Exception as e:
st.error(f'An error occurred while loading cluster label file: {e}')
return None
def run_algorithm(selector, matrix, num_points, labels):
try:
# Separate the non-numeric first column (element names) and the numeric data
element_names = matrix.iloc[:, 0].values # Assuming the first column contains the names
numeric_matrix = matrix.select_dtypes(include=[np.number]).values
if labels is not None:
selected_ids = selector.select(numeric_matrix, size = num_points, labels = labels)
else:
selected_ids = selector.select(numeric_matrix, size = num_points)
selected_ids = [(element_names[i], i) for i in selected_ids]
st.session_state['selected_ids'] = selected_ids
return selected_ids
except ValueError as ve:
st.error(f"An error occurred while running the algorithm: {ve}")
except Exception as e:
st.error(f"An error occurred while running the algorithm: {e}")
return None
def export_results(selected_ids):
export_format = st.selectbox("Select export format", ["CSV", "JSON"], key="export_format")
if export_format == "CSV":
csv_data = pd.DataFrame(selected_ids, columns=["Element", "Index"])
csv = csv_data.to_csv(index=False).encode('utf-8')
st.download_button(
label="Download as CSV",
data=csv,
file_name='selected_indices.csv',
mime='text/csv',
)
else:
json_data = json.dumps([{"Element": elem, "Index": i} for i, elem in selected_ids])
st.download_button(
label="Download as JSON",
data=json_data,
file_name='selected_indices.json',
mime='application/json',
)
# Function to clear selected indices from session state
def clear_results():
if 'selected_ids' in st.session_state:
del st.session_state['selected_ids']
if 'selector' in st.session_state:
del st.session_state['selector']