Spaces:
Sleeping
Sleeping
# The Selector library provides a set of tools for selecting a | |
# subset of the dataset and computing diversity. | |
# | |
# Copyright (C) 2023 The QC-Devs Community | |
# | |
# This file is part of Selector. | |
# | |
# Selector is free software; you can redistribute it and/or | |
# modify it under the terms of the GNU General Public License | |
# as published by the Free Software Foundation; either version 3 | |
# of the License, or (at your option) any later version. | |
# | |
# Selector is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, see <http://www.gnu.org/licenses/> | |
# | |
# -- | |
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import json | |
import os | |
from sklearn.metrics import pairwise_distances | |
def set_page_config(page_title, page_icon): | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
assets_dir = os.path.join(current_dir, "..", "assets") | |
st.set_page_config( | |
page_title=page_title, | |
page_icon=os.path.join(assets_dir, page_icon) | |
) | |
def display_sidebar_info(title, description, references): | |
st.sidebar.header(title) | |
st.sidebar.info(description) | |
st.sidebar.title("References") | |
st.sidebar.info(references) | |
# Load data from matrix file | |
def load_matrix(matrix_file): | |
try: | |
header_option = None | |
if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"): | |
header_option = st.checkbox("Does the file have a header?", key="header_option", | |
on_change = clear_results()) | |
st.warning("Warning: This will affect the final output if not specified correctly.") | |
if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"): | |
if header_option: | |
# Load the matrix with header | |
matrix = pd.read_csv(matrix_file) | |
else: | |
# Load the matrix without header | |
matrix = pd.read_csv(matrix_file, header=None) | |
st.write("Matrix shape:", matrix.shape) | |
st.write(matrix.values) | |
elif matrix_file.name.endswith(".npz"): | |
matrix_data = np.load(matrix_file) | |
array_names = matrix_data.files # Select the array in the .npz file | |
selected_array = st.selectbox("Select the array to use", array_names) | |
matrix = matrix_data[selected_array] | |
st.write("Matrix shape:", matrix.shape) | |
st.write(matrix) | |
elif matrix_file.name.endswith(".npy"): | |
matrix = np.load(matrix_file) | |
st.write("Matrix shape:", matrix.shape) | |
st.write(matrix) | |
return matrix | |
except Exception as e: | |
st.error(f'An error occurred while loading matrix file: {e}') | |
return None | |
def load_labels(label_file): | |
try: | |
label_header_option = None | |
if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"): | |
label_header_option = st.checkbox("Does the file have a header?", key="label_header_option", | |
on_change = clear_results()) | |
st.warning("Warning: This will affect the final output if not specified correctly.") | |
if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"): | |
if label_header_option: | |
labels = pd.read_csv(label_file).values.flatten() | |
else: | |
labels = pd.read_csv(label_file, header=None).values.flatten() | |
st.write("Cluster labels shape:", labels.shape) | |
st.write(labels) | |
return labels | |
except Exception as e: | |
st.error(f'An error occurred while loading cluster label file: {e}') | |
return None | |
def run_algorithm(selector, matrix, num_points, labels): | |
try: | |
# Separate the non-numeric first column (element names) and the numeric data | |
element_names = matrix.iloc[:, 0].values # Assuming the first column contains the names | |
numeric_matrix = matrix.select_dtypes(include=[np.number]).values | |
if labels is not None: | |
selected_ids = selector.select(numeric_matrix, size = num_points, labels = labels) | |
else: | |
selected_ids = selector.select(numeric_matrix, size = num_points) | |
selected_ids = [(element_names[i], i) for i in selected_ids] | |
st.session_state['selected_ids'] = selected_ids | |
return selected_ids | |
except ValueError as ve: | |
st.error(f"An error occurred while running the algorithm: {ve}") | |
except Exception as e: | |
st.error(f"An error occurred while running the algorithm: {e}") | |
return None | |
def export_results(selected_ids): | |
export_format = st.selectbox("Select export format", ["CSV", "JSON"], key="export_format") | |
if export_format == "CSV": | |
csv_data = pd.DataFrame(selected_ids, columns=["Element", "Index"]) | |
csv = csv_data.to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label="Download as CSV", | |
data=csv, | |
file_name='selected_indices.csv', | |
mime='text/csv', | |
) | |
else: | |
json_data = json.dumps([{"Element": elem, "Index": i} for i, elem in selected_ids]) | |
st.download_button( | |
label="Download as JSON", | |
data=json_data, | |
file_name='selected_indices.json', | |
mime='application/json', | |
) | |
# Function to clear selected indices from session state | |
def clear_results(): | |
if 'selected_ids' in st.session_state: | |
del st.session_state['selected_ids'] | |
if 'selector' in st.session_state: | |
del st.session_state['selector'] | |