# The Selector library provides a set of tools for selecting a # subset of the dataset and computing diversity. # # Copyright (C) 2023 The QC-Devs Community # # This file is part of Selector. # # Selector is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. # # Selector is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see # # -- import streamlit as st import numpy as np import pandas as pd import json import os from sklearn.metrics import pairwise_distances def set_page_config(page_title, page_icon): current_dir = os.path.dirname(os.path.abspath(__file__)) assets_dir = os.path.join(current_dir, "..", "assets") st.set_page_config( page_title=page_title, page_icon=os.path.join(assets_dir, page_icon) ) def display_sidebar_info(title, description, references): st.sidebar.header(title) st.sidebar.info(description) st.sidebar.title("References") st.sidebar.info(references) # Load data from matrix file def load_matrix(matrix_file): try: header_option = None if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"): header_option = st.checkbox("Does the file have a header?", key="header_option", on_change = clear_results()) st.warning("Warning: This will affect the final output if not specified correctly.") if matrix_file.name.endswith(".csv") or matrix_file.name.endswith(".xlsx"): if header_option: # Load the matrix with header matrix = pd.read_csv(matrix_file) else: # Load the matrix without header matrix = pd.read_csv(matrix_file, header=None) st.write("Matrix shape:", matrix.shape) st.write(matrix.values) elif matrix_file.name.endswith(".npz"): matrix_data = np.load(matrix_file) array_names = matrix_data.files # Select the array in the .npz file selected_array = st.selectbox("Select the array to use", array_names) matrix = matrix_data[selected_array] st.write("Matrix shape:", matrix.shape) st.write(matrix) elif matrix_file.name.endswith(".npy"): matrix = np.load(matrix_file) st.write("Matrix shape:", matrix.shape) st.write(matrix) return matrix except Exception as e: st.error(f'An error occurred while loading matrix file: {e}') return None def load_labels(label_file): try: label_header_option = None if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"): label_header_option = st.checkbox("Does the file have a header?", key="label_header_option", on_change = clear_results()) st.warning("Warning: This will affect the final output if not specified correctly.") if label_file.name.endswith(".csv") or label_file.name.endswith(".xlsx"): if label_header_option: labels = pd.read_csv(label_file).values.flatten() else: labels = pd.read_csv(label_file, header=None).values.flatten() st.write("Cluster labels shape:", labels.shape) st.write(labels) return labels except Exception as e: st.error(f'An error occurred while loading cluster label file: {e}') return None def run_algorithm(selector, matrix, num_points, labels): try: # Separate the non-numeric first column (element names) and the numeric data element_names = matrix.iloc[:, 0].values # Assuming the first column contains the names numeric_matrix = matrix.select_dtypes(include=[np.number]).values if labels is not None: selected_ids = selector.select(numeric_matrix, size = num_points, labels = labels) else: selected_ids = selector.select(numeric_matrix, size = num_points) selected_ids = [(element_names[i], i) for i in selected_ids] st.session_state['selected_ids'] = selected_ids return selected_ids except ValueError as ve: st.error(f"An error occurred while running the algorithm: {ve}") except Exception as e: st.error(f"An error occurred while running the algorithm: {e}") return None def export_results(selected_ids): export_format = st.selectbox("Select export format", ["CSV", "JSON"], key="export_format") if export_format == "CSV": csv_data = pd.DataFrame(selected_ids, columns=["Element", "Index"]) csv = csv_data.to_csv(index=False).encode('utf-8') st.download_button( label="Download as CSV", data=csv, file_name='selected_indices.csv', mime='text/csv', ) else: json_data = json.dumps([{"Element": elem, "Index": i} for i, elem in selected_ids]) st.download_button( label="Download as JSON", data=json_data, file_name='selected_indices.json', mime='application/json', ) # Function to clear selected indices from session state def clear_results(): if 'selected_ids' in st.session_state: del st.session_state['selected_ids'] if 'selector' in st.session_state: del st.session_state['selector']