import gradio as gr import pandas as pd import numpy as np import umap import json import matplotlib.pyplot as plt import os # import tempfile import scanpy as sc # import argparse import subprocess import sys # from evaluate import AnndataProcessor # from accelerate import Accelerator from io import BytesIO from sklearn.linear_model import LogisticRegression from huggingface_hub import hf_hub_download def load_and_predict_with_classifier(x, model_path, output_path, save): # Load the model parameters from the JSON file with open(model_path, 'r') as f: model_params = json.load(f) # Reconstruct the logistic regression model model_loaded = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000) model_loaded.coef_ = np.array(model_params["coef"]) model_loaded.intercept_ = np.array(model_params["intercept"]) model_loaded.classes_ = np.array(model_params["classes"]) # output predictions y_pred = model_loaded.predict(x) # Convert the array to a Pandas DataFrame if save: df = pd.DataFrame(y_pred, columns=["predicted_cell_type"]) df.to_csv(output_path, index=False, header=False) return y_pred def main(input_file_path, species, default_dataset): # Get the current working directory current_working_directory = os.getcwd() # Print the current working directory print("Current Working Directory:", current_working_directory) # clone and cd into UCE repo os.system('git clone https://github.com/minwoosun/UCE.git') os.chdir('/home/user/app/UCE') # Get the current working directory current_working_directory = os.getcwd() # Print the current working directory print("Current Working Directory:", current_working_directory) # Specify the path to the directory you want to add new_directory = "/home/user/app/UCE" # Add the directory to the Python path sys.path.append(new_directory) # Set default dataset path default_dataset_1_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="100_pbmcs_proc_subset.h5ad") default_dataset_2_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="1k_pbmcs_proc_subset.h5ad") # If the user selects a default dataset, use that instead of the uploaded file if default_dataset == "Default Dataset 1: PBMC 100 cells": input_file_path = default_dataset_1_path elif default_dataset == "Default Dataset 2: PBMC 1000 cells": input_file_path = default_dataset_2_path ############## # UCE # ############## from evaluate import AnndataProcessor from accelerate import Accelerator # # python eval_single_anndata.py --adata_path "./data/10k_pbmcs_proc.h5ad" --dir "./" --model_loc "minwoosun/uce-100m" # script_name = "/home/user/app/UCE/eval_single_anndata.py" # args = ["--adata_path", input_file_path, "--dir", "/home/user/app/UCE/", "--model_loc", "minwoosun/uce-100m"] # command = ["python", script_name] + args dir_path = '/home/user/app/UCE/' model_loc = 'minwoosun/uce-100m' print(input_file_path) print(dir_path) print(model_loc) # # Verify adata_path is not None # if input_file_path is None or not os.path.exists(input_file_path): # raise ValueError(f"Invalid adata_path: {input_file_path}. Please check if the file exists.") # Construct the command command = [ 'python', '/home/user/app/UCE/eval_single_anndata.py', '--adata_path', input_file_path, '--dir', dir_path, '--model_loc', model_loc ] # Print the command for debugging print("Running command:", command) print("---> RUNNING UCE") result = subprocess.run(command, capture_output=True, text=True, check=True) print(result.stdout) print(result.stderr) print("---> FINSIH UCE") ################################ # Cell-type classification # ################################ # Set output file path file_name_with_ext = os.path.basename(input_file_path) file_name = os.path.splitext(file_name_with_ext)[0] pred_file = "/home/user/app/UCE/" + f"{file_name}_predictions.csv" model_path = hf_hub_download(repo_id="minwoosun/uce-misc", filename="tabula_sapiens_v1_logistic_regression_model_weights.json") file_name_with_ext = os.path.basename(input_file_path) file_name = os.path.splitext(file_name_with_ext)[0] output_file = "/home/user/app/UCE/" + f"{file_name}_uce_adata.h5ad" adata = sc.read_h5ad(output_file) x = adata.obsm['X_uce'] y_pred = load_and_predict_with_classifier(x, model_path, pred_file, save=True) ############## # UMAP # ############## UMAP = True if (UMAP): # # Set output file path # file_name_with_ext = os.path.basename(input_file_path) # file_name = os.path.splitext(file_name_with_ext)[0] # output_file = "/home/user/app/UCE/" + f"{file_name}_uce_adata.h5ad" # adata = sc.read_h5ad(output_file) labels = pd.Categorical(adata.obs["cell_type"]) reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, n_components=2, random_state=42) embedding = reducer.fit_transform(adata.obsm["X_uce"]) plt.figure(figsize=(10, 8)) # Create the scatter plot scatter = plt.scatter(embedding[:, 0], embedding[:, 1], c=labels.codes, cmap='Set1', s=50, alpha=0.6) # Create a legend handles = [] for i, cell_type in enumerate(labels.categories): handles.append(plt.Line2D([0], [0], marker='o', color='w', label=cell_type, markerfacecolor=plt.cm.Set1(i / len(labels.categories)), markersize=10)) plt.legend(handles=handles, title='Cell Type') plt.title('UMAP projection of the data') plt.xlabel('UMAP1') plt.ylabel('UMAP2') # Save plot to a BytesIO object buf = BytesIO() plt.savefig(buf, format='png') buf.seek(0) # Read the image from BytesIO object img = plt.imread(buf, format='png') else: img = None print("no image") return img, output_file, pred_file if __name__ == "__main__": css = """ body {background-color: white; color: black;} .gradio-container {background-color: white; color: black;} .gr-file, .gr-image {background-color: #f0f0f0; color: black; border-color: black;} """ with gr.Blocks(css=css) as demo: gr.Markdown( '''