{ "cells": [ { "cell_type": "markdown", "id": "1107981a-6197-4184-8327-03ed05b31a5e", "metadata": {}, "source": [ "# II. BACKGROUND SUBSTRACTION NOTEBOOK" ] }, { "cell_type": "raw", "id": "a0c470f4-c306-4b84-8aa5-d8b3daf9c810", "metadata": {}, "source": [ "10/01/24\n", "Modifications by Zoé Gerber\n", "from an original code from Marilyne Labrie" ] }, { "cell_type": "raw", "id": "d53c8eb3-f434-4f16-bbd4-4f757747b501", "metadata": {}, "source": [ "II.1. PACKAGES IMPORT\n", "II.2. DIRECTORIES\n", "II.3. FILES\n", " II.3.1. METADATA\n", " II.3.2. NOT_INTENSITIES\n", " II.3.3. FULL_TO_SHORT_COLUMN_NAMES\n", " II.3.4. SHORT_TO_FULL_COLUMN_NAMES\n", " II.3.5. SAMPLES COLORS\n", " II.3.6. CHANNELS COLORS\n", " II.3.7. ROUNDS COLORS\n", " II.3.8. DATA\n", "II.4. FILTERING\n", "II.5. CELL TYPES COLORS\n", "II.6. CELL SUBTYPES COLORS\n", "\n", "II.7. BACKGROUND SUBSTRACTION\n", "II.8. SAVE" ] }, { "cell_type": "markdown", "id": "125cf03e-b740-4daa-9b16-21057959faee", "metadata": {}, "source": [ "## II.1. PACKAGES IMPORT" ] }, { "cell_type": "code", "execution_count": 27, "id": "b4faaea6-5510-44e5-9e8d-b9160dc4b3b5", "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import random\n", "import re\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sb\n", "import matplotlib.pyplot as plt\n", "import matplotlib.colors as mplc\n", "import subprocess\n", "import warnings\n", "\n", "from scipy import signal\n", "\n", "import plotly.figure_factory as ff\n", "import plotly\n", "import plotly.graph_objs as go\n", "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot \n", "import plotly.express as px\n", "init_notebook_mode(connected = True)\n", "\n", "from my_modules import *" ] }, { "cell_type": "code", "execution_count": 28, "id": "41c1e319-4dfb-43a2-b38b-83b789498988", "metadata": {}, "outputs": [], "source": [ "#Silence FutureWarnings & UserWarnings\n", "warnings.filterwarnings('ignore', category= FutureWarning)\n", "warnings.filterwarnings('ignore', category= UserWarning)" ] }, { "cell_type": "markdown", "id": "7946a507-14f5-4dd0-b2f7-a8b1ced9c3df", "metadata": {}, "source": [ "## II.2. *DIRECTORIES" ] }, { "cell_type": "code", "execution_count": 35, "id": "108f1f6d-4cd5-495f-91b7-b826f3d1f772", "metadata": {}, "outputs": [], "source": [ "# Set base directory\n", "\n", "##### MAC WORKSTATION #####\n", "#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'\n", "###########################\n", "\n", "##### WINDOWS WORKSTATION #####\n", "#base_dir = r'C:\\Users\\LaboLabrie\\gerz2701\\cyCIF-pipeline\\Set_B'\n", "###############################\n", "\n", "##### LOCAL WORKSTATION #####\n", "base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'\n", "#############################\n", "\n", "#set_name = 'Set_A'\n", "set_name = 'test'" ] }, { "cell_type": "raw", "id": "aa48c54f-12f6-4f27-bb71-edcae686bb2b", "metadata": {}, "source": [ "The project is organized as :\n", "main dir \n", " code\n", " proj_data > all csv files\n", " proj_metadata > exposure time csv file, images dir,...\n", " proj_qc_eda > csv after the QC/EDA step\n", " proj_bs > csv after the BS step" ] }, { "cell_type": "code", "execution_count": 36, "id": "a64af03a-7a84-4121-8eaa-7f08e6b3b21e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/ directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/images directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/images directory already exists !\n" ] } ], "source": [ "project_name = set_name # Project name\n", "step_suffix = 'bs' # Curent part (here part II)\n", "previous_step_suffix_long = \"_qc_eda\" # Previous part (here QC/EDA NOTEBOOK)\n", "\n", "# Initial input data directory\n", "input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long) \n", "\n", "# BS output directories\n", "output_data_dir = os.path.join(base_dir, project_name + \"_\" + step_suffix)\n", "# BS images subdirectory\n", "output_images_dir = os.path.join(output_data_dir,\"images\")\n", "\n", "# Data and Metadata directories\n", "# Metadata directories\n", "metadata_dir = os.path.join(base_dir, project_name + \"_metadata\")\n", "# images subdirectory\n", "metadata_images_dir = os.path.join(metadata_dir,\"images\")\n", "\n", "# Create directories if they don't already exist\n", "for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:\n", " if not os.path.exists(d):\n", " print(\"Creation of the\" , d, \"directory...\")\n", " os.makedirs(d)\n", " else :\n", " print(\"The\", d, \"directory already exists !\")\n", "\n", "os.chdir(input_data_dir)" ] }, { "cell_type": "code", "execution_count": 37, "id": "3396590c-e964-4053-be52-ef079e2d8e46", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "base_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/\n", "input_data_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda\n", "output_data_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs\n", "output_images_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/images\n", "metadata_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata\n", "metadata_images_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/images\n" ] } ], "source": [ "# Verify paths\n", "print('base_dir :', base_dir)\n", "print('input_data_dir :', input_data_dir)\n", "print('output_data_dir :', output_data_dir)\n", "print('output_images_dir :', output_images_dir)\n", "print('metadata_dir :', metadata_dir)\n", "print('metadata_images_dir :', metadata_images_dir)" ] }, { "cell_type": "markdown", "id": "8fce1113-2492-49a1-bb79-2553cb4a4fcd", "metadata": {}, "source": [ "## II.3. FILES" ] }, { "cell_type": "raw", "id": "41524daf-bcaa-4407-96aa-7a11a2dff993", "metadata": {}, "source": [ "Don't forget to put your data in the projname_data directory !" ] }, { "cell_type": "markdown", "id": "3d6665ba-cb34-4e75-bb77-085888c8af8b", "metadata": {}, "source": [ "### II.3.1. METADATA" ] }, { "cell_type": "code", "execution_count": 38, "id": "558e4ac8-3fd8-45fb-acdc-803baaf8a8a5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/marker_intensity_metadata.csv file was imported for further analysis!\n", "WARNING: 'Marker metadata file' has the following unexpected item(s): \n", "['Exp']\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChanneltarget_lowerfull_columnmarkerlocalisation
0R0AF488300c2af488AF488_Cell_Intensity_AverageAF488cell
1R0AF488300c2af488AF488_Cytoplasm_Intensity_AverageAF488cytoplasm
2R0AF488300c2af488AF488_Nucleus_Intensity_AverageAF488nucleus
3R0AF5551500c3af555AF555_Cell_Intensity_AverageAF555cell
4R0AF5551500c3af555AF555_Cytoplasm_Intensity_AverageAF555cytoplasm
\n", "
" ], "text/plain": [ " Round Target Exp Channel target_lower full_column \\\n", "0 R0 AF488 300 c2 af488 AF488_Cell_Intensity_Average \n", "1 R0 AF488 300 c2 af488 AF488_Cytoplasm_Intensity_Average \n", "2 R0 AF488 300 c2 af488 AF488_Nucleus_Intensity_Average \n", "3 R0 AF555 1500 c3 af555 AF555_Cell_Intensity_Average \n", "4 R0 AF555 1500 c3 af555 AF555_Cytoplasm_Intensity_Average \n", "\n", " marker localisation \n", "0 AF488 cell \n", "1 AF488 cytoplasm \n", "2 AF488 nucleus \n", "3 AF555 cell \n", "4 AF555 cytoplasm " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Import all metadata we need from the QC/EDA chapter\n", "\n", "# METADATA\n", "filename = \"marker_intensity_metadata.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \"+filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", " \n", "# Open, read in information\n", "metadata = pd.read_csv(filename)\n", "\n", "# Verify size with verify_line_no() function in my_modules.py\n", "#verify_line_no(filename, metadata.shape[0] + 1)\n", "\n", "# Verify headers\n", "exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']\n", "compare_headers(exp_cols, metadata.columns.values, \"Marker metadata file\")\n", "\n", "metadata = metadata.dropna()\n", "metadata.head()" ] }, { "cell_type": "markdown", "id": "85e0a921-eeb1-4ece-8252-1df8325bc883", "metadata": {}, "source": [ "### II.3.2. NOT_INTENSITIES" ] }, { "cell_type": "code", "execution_count": 39, "id": "f5965d04-1254-45d8-be20-f207068d25c4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/not_intensities.csv file was imported for further analysis!\n", "Verifying data read from file is the correct length...\n", "\n", "not_intensities =\n", " ['Cytoplasm_Size', 'Nuc_X', 'Primary_chem(1)_vs_surg(0)', 'cluster', 'immune_checkpoint', 'Sample_ID', 'Nucleus_Roundness', 'Unique_ROI_index', 'Nuc_Y', 'Nuc_X_Inv', 'Cell_ID', 'cell_subtype', 'ID', 'Nuc_Y_Inv', 'Patient', 'replicate_ID', 'cell_type', 'ROI_index', 'Cell_Size', 'Nucleus_Size']\n" ] } ], "source": [ "# NOT_INTENSITIES\n", "filename = \"not_intensities.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \"+filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", "\n", "# Open, read in information\n", "not_intensities = []\n", "with open(filename, 'r') as fh:\n", " not_intensities = fh.read().strip().split(\"\\n\")\n", " # take str, strip whitespace, split on new line character\n", "\n", "# Verify size\n", "print(\"Verifying data read from file is the correct length...\\n\")\n", "#verify_line_no(filename, len(not_intensities))\n", "\n", "# Print to console\n", "print(\"not_intensities =\\n\", not_intensities)" ] }, { "cell_type": "markdown", "id": "4568f560-eca8-4010-8d61-ef585d0bc5b3", "metadata": {}, "source": [ "### II.3.3. FULL_TO_SHORT_COLUMN_NAMES" ] }, { "cell_type": "code", "execution_count": 40, "id": "4fccb7b8-27e0-47e3-8b86-392ac2dfed8d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/full_to_short_column_names.csv file was imported for further analysis!\n", "Verifying data read from file is the correct length...\n", "\n", "full_to_short_names =\n", " {'AF488_Cell_Intensity_Average': 'AF488_Cell', 'AF488_Cytoplasm_Intensity_Average': 'AF488_Cytoplasm', 'AF488_Nucleus_Intensity_Average': 'AF488_Nucleus', 'AF555_Cell_Intensity_Average': 'AF555_Cell', 'AF555_Cytoplasm_Intensity_Average': 'AF555_Cytoplasm', 'AF555_Nucleus_Intensity_Average': 'AF555_Nucleus', 'AF647_Cell_Intensity_Average': 'AF647_Cell', 'AF647_Cytoplasm_Intensity_Average': 'AF647_Cytoplasm', 'AF647_Nucleus_Intensity_Average': 'AF647_Nucleus', 'AF750_Cell_Intensity_Average': 'AF750_Cell', 'AF750_Cytoplasm_Intensity_Average': 'AF750_Cytoplasm', 'AF750_Nucleus_Intensity_Average': 'AF750_Nucleus', 'aSMA_Cell_Intensity_Average': 'aSMA_Cell', 'aSMA_Cytoplasm_Intensity_Average': 'aSMA_Cytoplasm', 'aSMA_Nucleus_Intensity_Average': 'aSMA_Nucleus', 'AXL_Cell_Intensity_Average': 'AXL_Cell', 'AXL_Cytoplasm_Intensity_Average': 'AXL_Cytoplasm', 'AXL_Nucleus_Intensity_Average': 'AXL_Nucleus', 'B7H4_Cell_Intensity_Average': 'B7H4_Cell', 'B7H4_Cytoplasm_Intensity_Average': 'B7H4_Cytoplasm', 'B7H4_Nucleus_Intensity_Average': 'B7H4_Nucleus', 'CA9_Cell_Intensity_Average': 'CA9_Cell', 'CA9_Cytoplasm_Intensity_Average': 'CA9_Cytoplasm', 'CA9_Nucleus_Intensity_Average': 'CA9_Nucleus', 'CD4_Cell_Intensity_Average': 'CD4_Cell', 'CD4_Cytoplasm_Intensity_Average': 'CD4_Cytoplasm', 'CD4_Nucleus_Intensity_Average': 'CD4_Nucleus', 'CD8_Cell_Intensity_Average': 'CD8_Cell', 'CD8_Cytoplasm_Intensity_Average': 'CD8_Cytoplasm', 'CD8_Nucleus_Intensity_Average': 'CD8_Nucleus', 'CD11b_Cell_Intensity_Average': 'CD11b_Cell', 'CD11b_Cytoplasm_Intensity_Average': 'CD11b_Cytoplasm', 'CD11b_Nucleus_Intensity_Average': 'CD11b_Nucleus', 'CD11c_Cell_Intensity_Average': 'CD11c_Cell', 'CD11c_Cytoplasm_Intensity_Average': 'CD11c_Cytoplasm', 'CD11c_Nucleus_Intensity_Average': 'CD11c_Nucleus', 'CD20_Cell_Intensity_Average': 'CD20_Cell', 'CD20_Cytoplasm_Intensity_Average': 'CD20_Cytoplasm', 'CD20_Nucleus_Intensity_Average': 'CD20_Nucleus', 'CD31_Cell_Intensity_Average': 'CD31_Cell', 'CD31_Cytoplasm_Intensity_Average': 'CD31_Cytoplasm', 'CD31_Nucleus_Intensity_Average': 'CD31_Nucleus', 'CD44_Cell_Intensity_Average': 'CD44_Cell', 'CD44_Cytoplasm_Intensity_Average': 'CD44_Cytoplasm', 'CD44_Nucleus_Intensity_Average': 'CD44_Nucleus', 'CD45_Cell_Intensity_Average': 'CD45_Cell', 'CD45_Cytoplasm_Intensity_Average': 'CD45_Cytoplasm', 'CD45_Nucleus_Intensity_Average': 'CD45_Nucleus', 'CD68_Cell_Intensity_Average': 'CD68_Cell', 'CD68_Cytoplasm_Intensity_Average': 'CD68_Cytoplasm', 'CD68_Nucleus_Intensity_Average': 'CD68_Nucleus', 'CD163_Cell_Intensity_Average': 'CD163_Cell', 'CD163_Cytoplasm_Intensity_Average': 'CD163_Cytoplasm', 'CD163_Nucleus_Intensity_Average': 'CD163_Nucleus', 'CKs_Cell_Intensity_Average': 'CKs_Cell', 'CKs_Cytoplasm_Intensity_Average': 'CKs_Cytoplasm', 'CKs_Nucleus_Intensity_Average': 'CKs_Nucleus', 'ColVI_Cell_Intensity_Average': 'ColVI_Cell', 'ColVI_Cytoplasm_Intensity_Average': 'ColVI_Cytoplasm', 'ColVI_Nucleus_Intensity_Average': 'ColVI_Nucleus', 'Desmin_Cell_Intensity_Average': 'Desmin_Cell', 'Desmin_Cytoplasm_Intensity_Average': 'Desmin_Cytoplasm', 'Desmin_Nucleus_Intensity_Average': 'Desmin_Nucleus', 'Ecad_Cell_Intensity_Average': 'Ecad_Cell', 'Ecad_Cytoplasm_Intensity_Average': 'Ecad_Cytoplasm', 'Ecad_Nucleus_Intensity_Average': 'Ecad_Nucleus', 'Fibronectin_Cell_Intensity_Average': 'Fibronectin_Cell', 'Fibronectin_Cytoplasm_Intensity_Average': 'Fibronectin_Cytoplasm', 'Fibronectin_Nucleus_Intensity_Average': 'Fibronectin_Nucleus', 'FOXP3_Cell_Intensity_Average': 'FOXP3_Cell', 'FOXP3_Cytoplasm_Intensity_Average': 'FOXP3_Cytoplasm', 'FOXP3_Nucleus_Intensity_Average': 'FOXP3_Nucleus', 'GATA3_Cell_Intensity_Average': 'GATA3_Cell', 'GATA3_Cytoplasm_Intensity_Average': 'GATA3_Cytoplasm', 'GATA3_Nucleus_Intensity_Average': 'GATA3_Nucleus', 'HLA_Cell_Intensity_Average': 'HLA_Cell', 'HLA_Cytoplasm_Intensity_Average': 'HLA_Cytoplasm', 'HLA_Nucleus_Intensity_Average': 'HLA_Nucleus', 'Ki67_Cell_Intensity_Average': 'Ki67_Cell', 'Ki67_Cytoplasm_Intensity_Average': 'Ki67_Cytoplasm', 'Ki67_Nucleus_Intensity_Average': 'Ki67_Nucleus', 'MMP9_Cell_Intensity_Average': 'MMP9_Cell', 'MMP9_Cytoplasm_Intensity_Average': 'MMP9_Cytoplasm', 'MMP9_Nucleus_Intensity_Average': 'MMP9_Nucleus', 'PD1_Cell_Intensity_Average': 'PD1_Cell', 'PD1_Cytoplasm_Intensity_Average': 'PD1_Cytoplasm', 'PD1_Nucleus_Intensity_Average': 'PD1_Nucleus', 'PDGFR_Cell_Intensity_Average': 'PDGFR_Cell', 'PDGFR_Cytoplasm_Intensity_Average': 'PDGFR_Cytoplasm', 'PDGFR_Nucleus_Intensity_Average': 'PDGFR_Nucleus', 'PDL1_Cell_Intensity_Average': 'PDL1_Cell', 'PDL1_Cytoplasm_Intensity_Average': 'PDL1_Cytoplasm', 'PDL1_Nucleus_Intensity_Average': 'PDL1_Nucleus', 'r5c2_Cell_Intensity_Average': 'r5c2_Cell', 'r5c2_Cytoplasm_Intensity_Average': 'r5c2_Cytoplasm', 'r5c2_Nucleus_Intensity_Average': 'r5c2_Nucleus', 'r7c2_Cell_Intensity_Average': 'r7c2_Cell', 'r7c2_Cytoplasm_Intensity_Average': 'r7c2_Cytoplasm', 'r7c2_Nucleus_Intensity_Average': 'r7c2_Nucleus', 'r8c2_Cell_Intensity_Average': 'r8c2_Cell', 'r8c2_Cytoplasm_Intensity_Average': 'r8c2_Cytoplasm', 'r8c2_Nucleus_Intensity_Average': 'r8c2_Nucleus', 'Sting_Cell_Intensity_Average': 'Sting_Cell', 'Sting_Cytoplasm_Intensity_Average': 'Sting_Cytoplasm', 'Sting_Nucleus_Intensity_Average': 'Sting_Nucleus', 'Vimentin_Cell_Intensity_Average': 'Vimentin_Cell', 'Vimentin_Cytoplasm_Intensity_Average': 'Vimentin_Cytoplasm', 'Vimentin_Nucleus_Intensity_Average': 'Vimentin_Nucleus'}\n" ] } ], "source": [ "# FULL_TO_SHORT_COLUMN_NAMES\n", "filename = \"full_to_short_column_names.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \" + filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", " \n", "# Open, read in information\n", "df = pd.read_csv(filename, header = 0)\n", "\n", "# Verify size\n", "print(\"Verifying data read from file is the correct length...\\n\")\n", "#verify_line_no(filename, df.shape[0] + 1)\n", "\n", "# Turn into dictionary\n", "full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]\n", "\n", "# Print information\n", "print('full_to_short_names =\\n',full_to_short_names)" ] }, { "cell_type": "markdown", "id": "f5858f81-2f63-44b7-abee-9c00ed6c7aba", "metadata": {}, "source": [ "### II.3.4. SHORT_TO_FULL_COLUMN_NAMES" ] }, { "cell_type": "code", "execution_count": 41, "id": "b47edf92-9401-4d16-b532-0156fb493c0c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/short_to_full_column_names.csv file was imported for further analysis!\n", "Verifying data read from file is the correct length...\n", "\n", "short_to_full_names =\n", " {'AF488_Cell': 'AF488_Cell_Intensity_Average', 'AF488_Cytoplasm': 'AF488_Cytoplasm_Intensity_Average', 'AF488_Nucleus': 'AF488_Nucleus_Intensity_Average', 'AF555_Cell': 'AF555_Cell_Intensity_Average', 'AF555_Cytoplasm': 'AF555_Cytoplasm_Intensity_Average', 'AF555_Nucleus': 'AF555_Nucleus_Intensity_Average', 'AF647_Cell': 'AF647_Cell_Intensity_Average', 'AF647_Cytoplasm': 'AF647_Cytoplasm_Intensity_Average', 'AF647_Nucleus': 'AF647_Nucleus_Intensity_Average', 'AF750_Cell': 'AF750_Cell_Intensity_Average', 'AF750_Cytoplasm': 'AF750_Cytoplasm_Intensity_Average', 'AF750_Nucleus': 'AF750_Nucleus_Intensity_Average', 'aSMA_Cell': 'aSMA_Cell_Intensity_Average', 'aSMA_Cytoplasm': 'aSMA_Cytoplasm_Intensity_Average', 'aSMA_Nucleus': 'aSMA_Nucleus_Intensity_Average', 'AXL_Cell': 'AXL_Cell_Intensity_Average', 'AXL_Cytoplasm': 'AXL_Cytoplasm_Intensity_Average', 'AXL_Nucleus': 'AXL_Nucleus_Intensity_Average', 'B7H4_Cell': 'B7H4_Cell_Intensity_Average', 'B7H4_Cytoplasm': 'B7H4_Cytoplasm_Intensity_Average', 'B7H4_Nucleus': 'B7H4_Nucleus_Intensity_Average', 'CA9_Cell': 'CA9_Cell_Intensity_Average', 'CA9_Cytoplasm': 'CA9_Cytoplasm_Intensity_Average', 'CA9_Nucleus': 'CA9_Nucleus_Intensity_Average', 'CD4_Cell': 'CD4_Cell_Intensity_Average', 'CD4_Cytoplasm': 'CD4_Cytoplasm_Intensity_Average', 'CD4_Nucleus': 'CD4_Nucleus_Intensity_Average', 'CD8_Cell': 'CD8_Cell_Intensity_Average', 'CD8_Cytoplasm': 'CD8_Cytoplasm_Intensity_Average', 'CD8_Nucleus': 'CD8_Nucleus_Intensity_Average', 'CD11b_Cell': 'CD11b_Cell_Intensity_Average', 'CD11b_Cytoplasm': 'CD11b_Cytoplasm_Intensity_Average', 'CD11b_Nucleus': 'CD11b_Nucleus_Intensity_Average', 'CD11c_Cell': 'CD11c_Cell_Intensity_Average', 'CD11c_Cytoplasm': 'CD11c_Cytoplasm_Intensity_Average', 'CD11c_Nucleus': 'CD11c_Nucleus_Intensity_Average', 'CD20_Cell': 'CD20_Cell_Intensity_Average', 'CD20_Cytoplasm': 'CD20_Cytoplasm_Intensity_Average', 'CD20_Nucleus': 'CD20_Nucleus_Intensity_Average', 'CD31_Cell': 'CD31_Cell_Intensity_Average', 'CD31_Cytoplasm': 'CD31_Cytoplasm_Intensity_Average', 'CD31_Nucleus': 'CD31_Nucleus_Intensity_Average', 'CD44_Cell': 'CD44_Cell_Intensity_Average', 'CD44_Cytoplasm': 'CD44_Cytoplasm_Intensity_Average', 'CD44_Nucleus': 'CD44_Nucleus_Intensity_Average', 'CD45_Cell': 'CD45_Cell_Intensity_Average', 'CD45_Cytoplasm': 'CD45_Cytoplasm_Intensity_Average', 'CD45_Nucleus': 'CD45_Nucleus_Intensity_Average', 'CD68_Cell': 'CD68_Cell_Intensity_Average', 'CD68_Cytoplasm': 'CD68_Cytoplasm_Intensity_Average', 'CD68_Nucleus': 'CD68_Nucleus_Intensity_Average', 'CD163_Cell': 'CD163_Cell_Intensity_Average', 'CD163_Cytoplasm': 'CD163_Cytoplasm_Intensity_Average', 'CD163_Nucleus': 'CD163_Nucleus_Intensity_Average', 'CKs_Cell': 'CKs_Cell_Intensity_Average', 'CKs_Cytoplasm': 'CKs_Cytoplasm_Intensity_Average', 'CKs_Nucleus': 'CKs_Nucleus_Intensity_Average', 'ColVI_Cell': 'ColVI_Cell_Intensity_Average', 'ColVI_Cytoplasm': 'ColVI_Cytoplasm_Intensity_Average', 'ColVI_Nucleus': 'ColVI_Nucleus_Intensity_Average', 'Desmin_Cell': 'Desmin_Cell_Intensity_Average', 'Desmin_Cytoplasm': 'Desmin_Cytoplasm_Intensity_Average', 'Desmin_Nucleus': 'Desmin_Nucleus_Intensity_Average', 'Ecad_Cell': 'Ecad_Cell_Intensity_Average', 'Ecad_Cytoplasm': 'Ecad_Cytoplasm_Intensity_Average', 'Ecad_Nucleus': 'Ecad_Nucleus_Intensity_Average', 'Fibronectin_Cell': 'Fibronectin_Cell_Intensity_Average', 'Fibronectin_Cytoplasm': 'Fibronectin_Cytoplasm_Intensity_Average', 'Fibronectin_Nucleus': 'Fibronectin_Nucleus_Intensity_Average', 'FOXP3_Cell': 'FOXP3_Cell_Intensity_Average', 'FOXP3_Cytoplasm': 'FOXP3_Cytoplasm_Intensity_Average', 'FOXP3_Nucleus': 'FOXP3_Nucleus_Intensity_Average', 'GATA3_Cell': 'GATA3_Cell_Intensity_Average', 'GATA3_Cytoplasm': 'GATA3_Cytoplasm_Intensity_Average', 'GATA3_Nucleus': 'GATA3_Nucleus_Intensity_Average', 'HLA_Cell': 'HLA_Cell_Intensity_Average', 'HLA_Cytoplasm': 'HLA_Cytoplasm_Intensity_Average', 'HLA_Nucleus': 'HLA_Nucleus_Intensity_Average', 'Ki67_Cell': 'Ki67_Cell_Intensity_Average', 'Ki67_Cytoplasm': 'Ki67_Cytoplasm_Intensity_Average', 'Ki67_Nucleus': 'Ki67_Nucleus_Intensity_Average', 'MMP9_Cell': 'MMP9_Cell_Intensity_Average', 'MMP9_Cytoplasm': 'MMP9_Cytoplasm_Intensity_Average', 'MMP9_Nucleus': 'MMP9_Nucleus_Intensity_Average', 'PD1_Cell': 'PD1_Cell_Intensity_Average', 'PD1_Cytoplasm': 'PD1_Cytoplasm_Intensity_Average', 'PD1_Nucleus': 'PD1_Nucleus_Intensity_Average', 'PDGFR_Cell': 'PDGFR_Cell_Intensity_Average', 'PDGFR_Cytoplasm': 'PDGFR_Cytoplasm_Intensity_Average', 'PDGFR_Nucleus': 'PDGFR_Nucleus_Intensity_Average', 'PDL1_Cell': 'PDL1_Cell_Intensity_Average', 'PDL1_Cytoplasm': 'PDL1_Cytoplasm_Intensity_Average', 'PDL1_Nucleus': 'PDL1_Nucleus_Intensity_Average', 'r5c2_Cell': 'r5c2_Cell_Intensity_Average', 'r5c2_Cytoplasm': 'r5c2_Cytoplasm_Intensity_Average', 'r5c2_Nucleus': 'r5c2_Nucleus_Intensity_Average', 'r7c2_Cell': 'r7c2_Cell_Intensity_Average', 'r7c2_Cytoplasm': 'r7c2_Cytoplasm_Intensity_Average', 'r7c2_Nucleus': 'r7c2_Nucleus_Intensity_Average', 'r8c2_Cell': 'r8c2_Cell_Intensity_Average', 'r8c2_Cytoplasm': 'r8c2_Cytoplasm_Intensity_Average', 'r8c2_Nucleus': 'r8c2_Nucleus_Intensity_Average', 'Sting_Cell': 'Sting_Cell_Intensity_Average', 'Sting_Cytoplasm': 'Sting_Cytoplasm_Intensity_Average', 'Sting_Nucleus': 'Sting_Nucleus_Intensity_Average', 'Vimentin_Cell': 'Vimentin_Cell_Intensity_Average', 'Vimentin_Cytoplasm': 'Vimentin_Cytoplasm_Intensity_Average', 'Vimentin_Nucleus': 'Vimentin_Nucleus_Intensity_Average'}\n" ] } ], "source": [ "# SHORT_TO_FULL_COLUMN_NAMES\n", "filename = \"short_to_full_column_names.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \" + filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", "\n", "# Open, read in information\n", "df = pd.read_csv(filename, header = 0)\n", "\n", "# Verify size\n", "print(\"Verifying data read from file is the correct length...\\n\")\n", "#verify_line_no(filename, df.shape[0] + 1)\n", "\n", "# Turn into dictionary\n", "short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]\n", "\n", "# Print information\n", "print('short_to_full_names =\\n',short_to_full_names)" ] }, { "cell_type": "markdown", "id": "450d6f35-64de-47da-a359-7125a677cfd6", "metadata": {}, "source": [ "### II.3.5. SAMPLES COLORS" ] }, { "cell_type": "code", "execution_count": 42, "id": "4a72b8c0-bdc2-4d2a-8549-2b24cefba020", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/sample_color_data.csv file was imported for further analysis!\n", "Verifying data read from file is the correct length...\n", "\n", "sample_color_dict =\n", " {'DD3S1.csv': (0.9677975592919913, 0.44127456009157356, 0.5358103155058701), 'DD3S2.csv': (0.5920891529639701, 0.6418467016378244, 0.1935069134991043), 'DD3S3.csv': (0.21044753832183283, 0.6773105080456748, 0.6433941168468681), 'TMA.csv': (0.5019607843137255, 0.5019607843137255, 0.5019607843137255)}\n" ] } ], "source": [ "# COLORS INFORMATION\n", "filename = \"sample_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \" + filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", " \n", "# Open, read in information\n", "df = pd.read_csv(filename, header = 0)\n", "df = df.drop(columns = ['hex'])\n", "\n", "# our tuple of float values for rgb, (r, g, b) was read in \n", "# as a string '(r, g, b)'. We need to extract the r-, g-, and b-\n", "# substrings and convert them back into floats\n", "df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)\n", "\n", "# Verify size\n", "print(\"Verifying data read from file is the correct length...\\n\")\n", "#verify_line_no(filename, df.shape[0] + 1)\n", "\n", "# Turn into dictionary\n", "sample_color_dict = df.set_index('Sample_ID').T.to_dict('rgb')[0]\n", "\n", "# Print information\n", "print('sample_color_dict =\\n',sample_color_dict)" ] }, { "cell_type": "markdown", "id": "f6249335-5aef-4d35-9c80-163d68ae1432", "metadata": {}, "source": [ "### II.3.6. CHANNELS COLORS" ] }, { "cell_type": "code", "execution_count": 43, "id": "e460924e-82b4-4542-b297-399cd8fd6e68", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/channel_color_data.csv file was imported for further analysis!\n", "Verifying data read from file is the correct length...\n", "\n", "channel_color_dict =\n", " {'c2': (0.00784313725490196, 0.24313725490196078, 1.0), 'c3': (1.0, 0.48627450980392156, 0.0), 'c4': (0.10196078431372549, 0.788235294117647, 0.2196078431372549), 'c5': (0.9098039215686274, 0.0, 0.043137254901960784)}\n" ] } ], "source": [ "# CHANNELS\n", "filename = \"channel_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \"+filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", "\n", "# Open, read in information\n", "df = pd.read_csv(filename, header = 0)\n", "df = df.drop(columns = ['hex'])\n", "\n", "# our tuple of float values for rgb, (r, g, b) was read in \n", "# as a string '(r, g, b)'. We need to extract the r-, g-, and b-\n", "# substrings and convert them back into floats\n", "df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)\n", "\n", "# Verify size\n", "print(\"Verifying data read from file is the correct length...\\n\")\n", "#verify_line_no(filename, df.shape[0] + 1)\n", "\n", "# Turn into dictionary\n", "channel_color_dict = df.set_index('Channel').T.to_dict('rgb')[0]\n", "\n", "# Print information\n", "print('channel_color_dict =\\n',channel_color_dict)" ] }, { "cell_type": "markdown", "id": "051455b8-048b-4392-b5b5-9145d03391a8", "metadata": {}, "source": [ "### II.3.7. ROUNDS COLORS" ] }, { "cell_type": "code", "execution_count": 44, "id": "636ec133-d4e8-4f61-9893-f369d1538c83", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/round_color_data.csv file was imported for further analysis!\n", "Verifying data read from file is the correct length...\n", "\n", "round_color_dict =\n", " {'R0': (0.28685356234627135, 0.13009829239513535, 0.23110332132624437), 'R1': (0.36541462435986094, 0.2025447048359916, 0.37693310021636883), 'R2': (0.40867533458903105, 0.2940761173840091, 0.5166711878800253), 'R3': (0.42890613750051265, 0.4082290173220481, 0.6335348887063806), 'R4': (0.4444462906865238, 0.5264664993764805, 0.7056321892616532), 'R5': (0.47707206309601013, 0.6427061780374552, 0.7418477948908153), 'R6': (0.5414454866716836, 0.7466759172596551, 0.7572905778378964), 'R7': (0.6414710091647722, 0.8321551072276492, 0.7746773027952071), 'R8': (0.7684256891219349, 0.8992667116749021, 0.8171383269422353)}\n" ] } ], "source": [ "# ROUND\n", "filename = \"round_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "# Check file exists\n", "if not os.path.exists(filename):\n", " print(\"WARNING: Could not find desired file: \"+filename)\n", "else :\n", " print(\"The\",filename,\"file was imported for further analysis!\")\n", " \n", "# Open, read in information\n", "df = pd.read_csv(filename, header = 0)\n", "df = df.drop(columns = ['hex'])\n", "\n", "# our tuple of float values for rgb, (r, g, b) was read in \n", "# as a string '(r, g, b)'. We need to extract the r-, g-, and b-\n", "# substrings and convert them back into floats\n", "df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)\n", "\n", "# Verify size\n", "print(\"Verifying data read from file is the correct length...\\n\")\n", "#verify_line_no(filename, df.shape[0] + 1)\n", "\n", "# Turn into dictionary\n", "round_color_dict = df.set_index('Round').T.to_dict('rgb')[0]\n", "\n", "# Print information\n", "print('round_color_dict =\\n',round_color_dict)" ] }, { "cell_type": "markdown", "id": "05ad70c3-4525-4e98-ac85-2398bde990ab", "metadata": {}, "source": [ "### II.3.8. DATA" ] }, { "cell_type": "code", "execution_count": 45, "id": "eb2392ef-dd07-44cb-b94e-0a044abd0a47", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The following CSV files were detected:\n", "['DD3S3_qc_eda.csv', 'TMA_qc_eda.csv', 'DD3S1_qc_eda.csv', 'DD3S2_qc_eda.csv']\n" ] } ], "source": [ "# DATA\n", "# List files in the directory\n", "# Check if the directory exists\n", "if os.path.exists(input_data_dir):\n", " ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(\"_qc_eda.csv\")]\n", "\n", " print(\"The following CSV files were detected:\")\n", " print([sample for sample in ls_samples])\n", "else:\n", " print(f\"The directory {input_data_dir} does not exist.\")" ] }, { "cell_type": "code", "execution_count": 46, "id": "99e5b1af-1b1a-432e-8e75-e31e0c70e7c8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Nuc_X' 'Sample_ID' 'Nucleus_Roundness' 'Nuc_Y_Inv' 'ROI_index'\n", " 'Cell_Size' 'Nucleus_Size' 'AF488_Cell_Intensity_Average'\n", " 'AF488_Cytoplasm_Intensity_Average' 'AF488_Nucleus_Intensity_Average'\n", " 'AF555_Cell_Intensity_Average' 'AF555_Cytoplasm_Intensity_Average'\n", " 'AF555_Nucleus_Intensity_Average' 'AF647_Cell_Intensity_Average'\n", " 'AF647_Cytoplasm_Intensity_Average' 'AF647_Nucleus_Intensity_Average'\n", " 'AF750_Cell_Intensity_Average' 'AF750_Cytoplasm_Intensity_Average'\n", " 'AF750_Nucleus_Intensity_Average' 'aSMA_Cell_Intensity_Average'\n", " 'aSMA_Cytoplasm_Intensity_Average' 'aSMA_Nucleus_Intensity_Average'\n", " 'AXL_Cell_Intensity_Average' 'AXL_Cytoplasm_Intensity_Average'\n", " 'AXL_Nucleus_Intensity_Average' 'B7H4_Cell_Intensity_Average'\n", " 'B7H4_Cytoplasm_Intensity_Average' 'B7H4_Nucleus_Intensity_Average'\n", " 'CA9_Cell_Intensity_Average' 'CA9_Cytoplasm_Intensity_Average'\n", " 'CA9_Nucleus_Intensity_Average' 'CD4_Cell_Intensity_Average'\n", " 'CD4_Cytoplasm_Intensity_Average' 'CD4_Nucleus_Intensity_Average'\n", " 'CD8_Cell_Intensity_Average' 'CD8_Cytoplasm_Intensity_Average'\n", " 'CD8_Nucleus_Intensity_Average' 'CD11b_Cell_Intensity_Average'\n", " 'CD11b_Cytoplasm_Intensity_Average' 'CD11b_Nucleus_Intensity_Average'\n", " 'CD11c_Cell_Intensity_Average' 'CD11c_Cytoplasm_Intensity_Average'\n", " 'CD11c_Nucleus_Intensity_Average' 'CD20_Cell_Intensity_Average'\n", " 'CD20_Cytoplasm_Intensity_Average' 'CD20_Nucleus_Intensity_Average'\n", " 'CD31_Cell_Intensity_Average' 'CD31_Cytoplasm_Intensity_Average'\n", " 'CD31_Nucleus_Intensity_Average' 'CD44_Cell_Intensity_Average'\n", " 'CD44_Cytoplasm_Intensity_Average' 'CD44_Nucleus_Intensity_Average'\n", " 'CD45_Cell_Intensity_Average' 'CD45_Cytoplasm_Intensity_Average'\n", " 'CD45_Nucleus_Intensity_Average' 'CD68_Cell_Intensity_Average'\n", " 'CD68_Cytoplasm_Intensity_Average' 'CD68_Nucleus_Intensity_Average'\n", " 'CD163_Cell_Intensity_Average' 'CD163_Cytoplasm_Intensity_Average'\n", " 'CD163_Nucleus_Intensity_Average' 'CKs_Cell_Intensity_Average'\n", " 'CKs_Cytoplasm_Intensity_Average' 'CKs_Nucleus_Intensity_Average'\n", " 'ColVI_Cell_Intensity_Average' 'ColVI_Cytoplasm_Intensity_Average'\n", " 'ColVI_Nucleus_Intensity_Average' 'Desmin_Cell_Intensity_Average'\n", " 'Desmin_Cytoplasm_Intensity_Average' 'Desmin_Nucleus_Intensity_Average'\n", " 'Ecad_Cell_Intensity_Average' 'Ecad_Cytoplasm_Intensity_Average'\n", " 'Ecad_Nucleus_Intensity_Average' 'Fibronectin_Cell_Intensity_Average'\n", " 'Fibronectin_Cytoplasm_Intensity_Average'\n", " 'Fibronectin_Nucleus_Intensity_Average' 'FOXP3_Cell_Intensity_Average'\n", " 'FOXP3_Cytoplasm_Intensity_Average' 'FOXP3_Nucleus_Intensity_Average'\n", " 'GATA3_Cell_Intensity_Average' 'GATA3_Cytoplasm_Intensity_Average'\n", " 'GATA3_Nucleus_Intensity_Average' 'HLA_Cell_Intensity_Average'\n", " 'HLA_Cytoplasm_Intensity_Average' 'HLA_Nucleus_Intensity_Average'\n", " 'Ki67_Cell_Intensity_Average' 'Ki67_Cytoplasm_Intensity_Average'\n", " 'Ki67_Nucleus_Intensity_Average' 'MMP9_Cell_Intensity_Average'\n", " 'MMP9_Cytoplasm_Intensity_Average' 'MMP9_Nucleus_Intensity_Average'\n", " 'PD1_Cell_Intensity_Average' 'PD1_Cytoplasm_Intensity_Average'\n", " 'PD1_Nucleus_Intensity_Average' 'PDGFR_Cell_Intensity_Average'\n", " 'PDGFR_Cytoplasm_Intensity_Average' 'PDGFR_Nucleus_Intensity_Average'\n", " 'PDL1_Cell_Intensity_Average' 'PDL1_Cytoplasm_Intensity_Average'\n", " 'PDL1_Nucleus_Intensity_Average' 'r5c2_Cell_Intensity_Average'\n", " 'r5c2_Cytoplasm_Intensity_Average' 'r5c2_Nucleus_Intensity_Average'\n", " 'r7c2_Cell_Intensity_Average' 'r7c2_Cytoplasm_Intensity_Average'\n", " 'r7c2_Nucleus_Intensity_Average' 'r8c2_Cell_Intensity_Average'\n", " 'r8c2_Cytoplasm_Intensity_Average' 'r8c2_Nucleus_Intensity_Average'\n", " 'Sting_Cell_Intensity_Average' 'Sting_Cytoplasm_Intensity_Average'\n", " 'Sting_Nucleus_Intensity_Average' 'Vimentin_Cell_Intensity_Average'\n", " 'Vimentin_Cytoplasm_Intensity_Average'\n", " 'Vimentin_Nucleus_Intensity_Average']\n", "DD3S3_qc_eda.csv file is processed !\n", "\n", "TMA_qc_eda.csv file is processed !\n", "\n", "DD3S1_qc_eda.csv file is processed !\n", "\n", "DD3S2_qc_eda.csv file is processed !\n", "\n" ] } ], "source": [ "# Import all the others files\n", "dfs = {}\n", "\n", "# Set variable to hold default header values\n", "# First gather information on expected headers using first file in ls_samples\n", "# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples\n", "df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)\n", "expected_headers = df.columns.values\n", "print(expected_headers)\n", "\n", "###############################\n", "# !! This may take a while !! #\n", "###############################\n", "for sample in ls_samples:\n", " file_path = os.path.join(input_data_dir,sample)\n", " \n", " try:\n", " # Read the CSV file\n", " df = pd.read_csv(file_path, index_col=0)\n", " # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it\n", " \n", " if not df.empty:\n", " # Reorder the columns to match the expected headers list\n", " df = df.reindex(columns=expected_headers)\n", " print(sample, \"file is processed !\\n\")\n", " #print(df) \n", " \n", " except pd.errors.EmptyDataError:\n", " print(f'\\nEmpty data error in {sample} file. Removing from analysis...')\n", " ls_samples.remove(sample) \n", " \n", " # Add df to dfs \n", " dfs[sample] = df\n", "\n", "#print(dfs)" ] }, { "cell_type": "code", "execution_count": 47, "id": "2e2def98-8ea5-46ee-bad3-10d19feb56db", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Nuc_X Sample_ID Nucleus_Roundness Nuc_Y_Inv \\\n", "ID \n", "DD3S3_Cell_0 823.567871 DD3S3.csv 0.835324 15699.382812 \n", "DD3S3_Cell_1 852.840027 DD3S3.csv 0.523421 15690.533203 \n", "DD3S3_Cell_2 868.272705 DD3S3.csv 0.686147 15682.994141 \n", "DD3S3_Cell_3 842.131592 DD3S3.csv 0.881136 15688.894531 \n", "DD3S3_Cell_4 704.337280 DD3S3.csv 0.757623 15683.059570 \n", "\n", " ROI_index Cell_Size Nucleus_Size \\\n", "ID \n", "DD3S3_Cell_0 0 281 81 \n", "DD3S3_Cell_1 0 200 75 \n", "DD3S3_Cell_2 0 425 165 \n", "DD3S3_Cell_3 0 114 38 \n", "DD3S3_Cell_4 0 418 169 \n", "\n", " AF488_Cell_Intensity_Average AF488_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1996.348755 1883.150024 \n", "DD3S3_Cell_1 2523.199951 2450.087891 \n", "DD3S3_Cell_2 2864.345947 2765.046143 \n", "DD3S3_Cell_3 2328.473633 2267.855225 \n", "DD3S3_Cell_4 3349.023926 3018.883545 \n", "\n", " AF488_Nucleus_Intensity_Average ... \\\n", "ID ... \n", "DD3S3_Cell_0 2275.851807 ... \n", "DD3S3_Cell_1 2645.053223 ... \n", "DD3S3_Cell_2 3020.818115 ... \n", "DD3S3_Cell_3 2449.710449 ... \n", "DD3S3_Cell_4 3835.443848 ... \n", "\n", " r7c2_Nucleus_Intensity_Average r8c2_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 252.555557 268.523132 \n", "DD3S3_Cell_1 271.226654 310.339996 \n", "DD3S3_Cell_2 333.078796 349.395294 \n", "DD3S3_Cell_3 263.500000 291.289459 \n", "DD3S3_Cell_4 375.662720 384.011963 \n", "\n", " r8c2_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 257.945007 \n", "DD3S3_Cell_1 301.056000 \n", "DD3S3_Cell_2 339.603851 \n", "DD3S3_Cell_3 282.789459 \n", "DD3S3_Cell_4 350.662659 \n", "\n", " r8c2_Nucleus_Intensity_Average Sting_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 294.641968 1358.562256 \n", "DD3S3_Cell_1 325.813324 1856.005005 \n", "DD3S3_Cell_2 364.824249 1969.552979 \n", "DD3S3_Cell_3 308.289459 1319.236816 \n", "DD3S3_Cell_4 433.147919 1398.476074 \n", "\n", " Sting_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1318.834961 \n", "DD3S3_Cell_1 1954.375977 \n", "DD3S3_Cell_2 2171.303955 \n", "DD3S3_Cell_3 1249.684204 \n", "DD3S3_Cell_4 1301.931763 \n", "\n", " Sting_Nucleus_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1456.654297 \n", "DD3S3_Cell_1 1692.053345 \n", "DD3S3_Cell_2 1651.642456 \n", "DD3S3_Cell_3 1458.342163 \n", "DD3S3_Cell_4 1540.721924 \n", "\n", " Vimentin_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 2862.252686 \n", "DD3S3_Cell_1 3007.534912 \n", "DD3S3_Cell_2 1955.188232 \n", "DD3S3_Cell_3 3467.263184 \n", "DD3S3_Cell_4 1861.600464 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 2464.334961 \n", "DD3S3_Cell_1 3135.496094 \n", "DD3S3_Cell_2 2099.076904 \n", "DD3S3_Cell_3 3773.605225 \n", "DD3S3_Cell_4 1678.666626 \n", "\n", " Vimentin_Nucleus_Intensity_Average \n", "ID \n", "DD3S3_Cell_0 3844.765381 \n", "DD3S3_Cell_1 2794.266602 \n", "DD3S3_Cell_2 1728.454590 \n", "DD3S3_Cell_3 2854.578857 \n", "DD3S3_Cell_4 2131.130127 \n", "\n", "[5 rows x 115 columns]\n" ] } ], "source": [ "# Merge dfs into one df\n", "df = pd.concat(dfs.values(), ignore_index=False , sort = False)\n", "del dfs\n", "\n", "print(df.head())" ] }, { "cell_type": "code", "execution_count": 48, "id": "bbe27b0f-c7a0-489a-b845-74c42289f7bf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(433976, 115)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 49, "id": "949edaa7-22ec-4544-a250-4705759ca1bd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check for NaN entries (should not be any unless columns do not align)\n", "# False means no NaN entries \n", "# True means NaN entries \n", "df.isnull().any().any()" ] }, { "cell_type": "markdown", "id": "657ba08b-cb75-437a-9557-97e8acb46dcb", "metadata": {}, "source": [ "## II.4. *FILTERING" ] }, { "cell_type": "code", "execution_count": 50, "id": "db040ebb-6447-4e0a-a904-21e62eb3e1c3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of cells before filtering : 433976\n" ] } ], "source": [ "print(\"Number of cells before filtering :\", df.shape[0])" ] }, { "cell_type": "code", "execution_count": 51, "id": "d571eb73-5e76-4c16-b7ba-37a55553d454", "metadata": {}, "outputs": [], "source": [ "#print(df)" ] }, { "cell_type": "code", "execution_count": 52, "id": "0b66a96e-32e2-4682-ae20-ebd6fc41a9c5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of cells after filtering on nucleus size: 389617\n", "Number of cells after filtering on AF555A ___ intensity: 350554\n" ] } ], "source": [ "# Delete small cells and objects w/high AF555 Signal (RBCs) \n", "# We usually use the 95th percentile calculated during QC_EDA\n", "df = df.loc[(df['Nucleus_Size'] > 42 )]\n", "df = df.loc[(df['Nucleus_Size'] < 216)]\n", "print(\"Number of cells after filtering on nucleus size:\", df.shape[0])\n", "\n", "df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]\n", "print(\"Number of cells after filtering on AF555A ___ intensity:\", df.shape[0])" ] }, { "cell_type": "code", "execution_count": 53, "id": "3b3ecc8d-66ad-4153-8289-6e4935cf7fa1", "metadata": {}, "outputs": [], "source": [ "# Assign cell type\n", "# Assign tumor cells at each row at first (random assigning here just for development purposes)\n", "# Generate random values for cell_type column\n", "random_values = np.random.randint(0, 10, size=len(df))\n", "\n", "# Assign cell type based on random values\n", "def assign_cell_type(n):\n", " return 'none'\n", "\n", "df['cell_type'] = np.vectorize(assign_cell_type)(random_values)\n", "df['cell_subtype'] = df['cell_type'].copy()" ] }, { "cell_type": "code", "execution_count": 54, "id": "15dd4c6e-87d3-419e-b824-0a39fce7faa9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nuc_XSample_IDNucleus_RoundnessNuc_Y_InvROI_indexCell_SizeNucleus_SizeAF488_Cell_Intensity_AverageAF488_Cytoplasm_Intensity_AverageAF488_Nucleus_Intensity_Average...r8c2_Cytoplasm_Intensity_Averager8c2_Nucleus_Intensity_AverageSting_Cell_Intensity_AverageSting_Cytoplasm_Intensity_AverageSting_Nucleus_Intensity_AverageVimentin_Cell_Intensity_AverageVimentin_Cytoplasm_Intensity_AverageVimentin_Nucleus_Intensity_Averagecell_typecell_subtype
ID
DD3S3_Cell_0823.567871DD3S3.csv0.83532415699.3828120281811996.3487551883.1500242275.851807...257.945007294.6419681358.5622561318.8349611456.6542972862.2526862464.3349613844.765381nonenone
DD3S3_Cell_1852.840027DD3S3.csv0.52342115690.5332030200752523.1999512450.0878912645.053223...301.056000325.8133241856.0050051954.3759771692.0533453007.5349123135.4960942794.266602nonenone
DD3S3_Cell_2868.272705DD3S3.csv0.68614715682.99414104251652864.3459472765.0461433020.818115...339.603851364.8242491969.5529792171.3039551651.6424561955.1882322099.0769041728.454590nonenone
DD3S3_Cell_4704.337280DD3S3.csv0.75762315683.05957004181693349.0239263018.8835453835.443848...350.662659433.1479191398.4760741301.9317631540.7219241861.6004641678.6666262131.130127nonenone
DD3S3_Cell_5852.893799DD3S3.csv0.71465115683.01757802011132366.1342772088.7158202582.177002...276.897736326.5929261328.6020511144.6817631471.8319091457.7263181205.8522951653.876099nonenone
\n", "

5 rows × 117 columns

\n", "
" ], "text/plain": [ " Nuc_X Sample_ID Nucleus_Roundness Nuc_Y_Inv \\\n", "ID \n", "DD3S3_Cell_0 823.567871 DD3S3.csv 0.835324 15699.382812 \n", "DD3S3_Cell_1 852.840027 DD3S3.csv 0.523421 15690.533203 \n", "DD3S3_Cell_2 868.272705 DD3S3.csv 0.686147 15682.994141 \n", "DD3S3_Cell_4 704.337280 DD3S3.csv 0.757623 15683.059570 \n", "DD3S3_Cell_5 852.893799 DD3S3.csv 0.714651 15683.017578 \n", "\n", " ROI_index Cell_Size Nucleus_Size \\\n", "ID \n", "DD3S3_Cell_0 0 281 81 \n", "DD3S3_Cell_1 0 200 75 \n", "DD3S3_Cell_2 0 425 165 \n", "DD3S3_Cell_4 0 418 169 \n", "DD3S3_Cell_5 0 201 113 \n", "\n", " AF488_Cell_Intensity_Average AF488_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1996.348755 1883.150024 \n", "DD3S3_Cell_1 2523.199951 2450.087891 \n", "DD3S3_Cell_2 2864.345947 2765.046143 \n", "DD3S3_Cell_4 3349.023926 3018.883545 \n", "DD3S3_Cell_5 2366.134277 2088.715820 \n", "\n", " AF488_Nucleus_Intensity_Average ... \\\n", "ID ... \n", "DD3S3_Cell_0 2275.851807 ... \n", "DD3S3_Cell_1 2645.053223 ... \n", "DD3S3_Cell_2 3020.818115 ... \n", "DD3S3_Cell_4 3835.443848 ... \n", "DD3S3_Cell_5 2582.177002 ... \n", "\n", " r8c2_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 257.945007 \n", "DD3S3_Cell_1 301.056000 \n", "DD3S3_Cell_2 339.603851 \n", "DD3S3_Cell_4 350.662659 \n", "DD3S3_Cell_5 276.897736 \n", "\n", " r8c2_Nucleus_Intensity_Average Sting_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 294.641968 1358.562256 \n", "DD3S3_Cell_1 325.813324 1856.005005 \n", "DD3S3_Cell_2 364.824249 1969.552979 \n", "DD3S3_Cell_4 433.147919 1398.476074 \n", "DD3S3_Cell_5 326.592926 1328.602051 \n", "\n", " Sting_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1318.834961 \n", "DD3S3_Cell_1 1954.375977 \n", "DD3S3_Cell_2 2171.303955 \n", "DD3S3_Cell_4 1301.931763 \n", "DD3S3_Cell_5 1144.681763 \n", "\n", " Sting_Nucleus_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1456.654297 \n", "DD3S3_Cell_1 1692.053345 \n", "DD3S3_Cell_2 1651.642456 \n", "DD3S3_Cell_4 1540.721924 \n", "DD3S3_Cell_5 1471.831909 \n", "\n", " Vimentin_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 2862.252686 \n", "DD3S3_Cell_1 3007.534912 \n", "DD3S3_Cell_2 1955.188232 \n", "DD3S3_Cell_4 1861.600464 \n", "DD3S3_Cell_5 1457.726318 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 2464.334961 \n", "DD3S3_Cell_1 3135.496094 \n", "DD3S3_Cell_2 2099.076904 \n", "DD3S3_Cell_4 1678.666626 \n", "DD3S3_Cell_5 1205.852295 \n", "\n", " Vimentin_Nucleus_Intensity_Average cell_type cell_subtype \n", "ID \n", "DD3S3_Cell_0 3844.765381 none none \n", "DD3S3_Cell_1 2794.266602 none none \n", "DD3S3_Cell_2 1728.454590 none none \n", "DD3S3_Cell_4 2131.130127 none none \n", "DD3S3_Cell_5 1653.876099 none none \n", "\n", "[5 rows x 117 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "markdown", "id": "de4deb80-bdbe-430b-9991-019406d99ae2", "metadata": {}, "source": [ "## II.5. CELL TYPES COLORS" ] }, { "cell_type": "raw", "id": "9214d297-2e4b-45eb-b664-43f748c4bd6d", "metadata": {}, "source": [ "# Establish colors to use throughout workflow\n", "\n", "# we want colors that are categorical, since Cell Type is a non-ordered category. \n", "# A categorical color palette will have dissimilar colors.\n", "# Get those unique colors\n", "cell_types = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']\n", "color_values = sb.color_palette(\"hls\", n_colors = len(cell_types))\n", "# each color value is a tuple of three values: (R, G, B)\n", "\n", "print(\"Unique cell types are:\",df.cell_type.unique())\n", "# Display those unique colors\n", "sb.palplot(sb.color_palette(color_values))" ] }, { "cell_type": "code", "execution_count": 55, "id": "72a0ac93-964a-4541-91e0-5e21c46154f1", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUoAAABlCAYAAAArpKpSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAACpUlEQVR4nO3ZMW4aURiF0R9kGRpAcomg8kq8q0gpXKa0vINsLC48EguAxrhgUkRJqnAHHDQe+5z6FVcP6dPMMGrbti0A/mnc9wCA904oAQKhBAiEEiAQSoBAKAECoQQIrrocOhwOtdlsajab1Wg0uvQmgItr27Z2u10tl8saj48/M3YK5WazqfV6/V/GAbwnTdPUarU6eqZTKGezWVVVrb+uazz1tt7V3Y+7vicM0uPj974nDM63xZe+JwzOvvb1UA9/+nZMp1D+ft0eT8dCeYLr6+u+JwzSfN73guGZ1rTvCYPV5XOi6gEEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEV10OtW1bVVWHl8NFx3w0r6+vfU8YpO227wXD81IvfU8YnH3tq+pv344ZtR1OPT091e3t7duXAbwzTdPUarU6eqbTE+XNzU1VVT0/P9disXj7sk9iu93Wer2upmlqPp/3PWcQ3Nl53Nvp2rat3W5Xy+Uynu0UyvH416fMxWLhRzjDfD53bydyZ+dxb6fp+uDnzxyAQCgBgk6hnEwmdX9/X5PJ5NJ7PhT3djp3dh73dlmd/vUG+My8egMEQgkQCCVAIJQAgVACBEIJEAglQCCUAMFPFNNia56tTbkAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Define your custom colors for each cell type\n", "custom_colors = {\n", " 'CANCER': (0.1333, 0.5451, 0.1333),\n", " 'STROMA': (0.4, 0.4, 0.4),\n", " 'IMMUNE': (1, 1, 0),\n", " 'ENDOTHELIAL': (0.502, 0, 0.502)\n", "}\n", "\n", "# Retrieve the list of cell types\n", "cell_types = list(custom_colors.keys())\n", "\n", "# Extract the corresponding colors from the dictionary\n", "color_values = [custom_colors[cell] for cell in cell_types]\n", "\n", "# Display the colors\n", "sb.palplot(sb.color_palette(color_values))" ] }, { "cell_type": "code", "execution_count": 56, "id": "704674b9-be53-414b-a03a-4ad1465cfb23", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'CANCER': (0.1333, 0.5451, 0.1333),\n", " 'STROMA': (0.4, 0.4, 0.4),\n", " 'IMMUNE': (1, 1, 0),\n", " 'ENDOTHELIAL': (0.502, 0, 0.502)}" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Store in a dctionnary\n", "celltype_color_dict = dict(zip(cell_types, color_values))\n", "celltype_color_dict" ] }, { "cell_type": "code", "execution_count": 57, "id": "eeeb576b-aea8-4c37-ada1-7f324ddde99a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/celltype_color_data.csv was created!\n" ] } ], "source": [ "# Save color information (mapping and legend) to metadata directory\n", "# Create dataframe\n", "celltype_color_df = color_dict_to_df(celltype_color_dict, \"cell_type\")\n", "celltype_color_df.head()\n", "\n", "# Save to file in metadatadirectory\n", "filename = \"celltype_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "celltype_color_df.to_csv(filename, index = False)\n", "print(\"File\" + filename + \" was created!\")" ] }, { "cell_type": "code", "execution_count": 58, "id": "e5ba634f-bbc8-4662-9c8b-48bf3c649cc8", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAK4AAACHCAYAAACPr2IQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAVpklEQVR4nO2de1CU1f/H33thubisCIGIrdxvgqJUmmgkBYPklBphMlIgaqkIKVLqV+TiSJhUWzPlpWkFnUJNRbtQlNqud1BT0ESlGhGVJc1aCFkE4fz+YHh+PO0Cu+yGPbvnNfPMuOd8zuecZ+ft4TzPns/58AghBBQKx+A/7AFQKAOBCpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSYSGNujo6EB7e/u/MRaKBSASicDnGz9f6i1cQggaGhqgVquN7pRiufD5fHh6ekIkEhnlh6fvRnKVSgW1Wg0XFxfY2dmBx+MZ1THF8ujs7ER9fT2srKwwatQoozSk14zb0dHBiNbJyWnAnVEozs7OqK+vx4MHD2BlZTVgP3otNrrXtHZ2dgPuiEIBwCwROjo6jPJj0CqZLg8oxmIqDdHXYRROQoVL4SRUuP2Qk5ODcePGMZ+TkpIwc+bMhzYeShdmLdyGhgakpqbCy8sL1tbWkEqleP7553H48OF/rc+ioiI4ODj8a/4pXRj8yxlXqK2txeTJk+Hg4ICCggKMGTMG7e3t+P7775GSkoIrV6487CFSjMBsZ9wlS5aAx+Ph9OnTiI2NhZ+fH4KCgpCeno7y8nLGTq1WY8GCBXB2doZEIsEzzzyDqqqqAfWpVCoxb948NDY2gsfjgcfjIScnB+vWrUNwcLCW/bhx47B27VoA/78Eyc3NZcayaNEitLW1MfadnZ3Iz8+Hp6cnbG1tERISgr179w5orFzHLGfcP//8E2VlZcjLy8OQIUO06nv+KY+Li4OtrS2+++47DB06FFu3bsWzzz6LmpoaODo6GtRvWFgYPvjgA2RlZeHq1asAALFYDLVajdzcXJw5cwZPPPEEAOD8+fO4cOECSkpKmPaHDx+GjY0NlEolamtrMW/ePDg5OSEvLw8AkJ+fj88++wxbtmyBr68vjh49ioSEBDg7O+Ppp5829GviNkQPNBoNqa6uJhqNRh/zh05FRQUBQEpKSvq0O3bsGJFIJKS1tZVV7u3tTbZu3UoIISQ7O5uEhIQwdYmJiWTGjBm9+iwsLCRDhw7VKo+JiSGLFy9mPqemppKpU6ey/Do6OpJ79+4xZZs3byZisZh0dHSQ1tZWYmdnR06ePMnyO3/+fBIfH9/nff6XMJWWzHLGJXqe41dVVYXm5matn7E1Gg1+++03k45p4cKFSE5Oxvvvvw8+n4/i4mLIZDKWTUhICOvXyUmTJqG5uRk3btxAc3MzWlpaEBUVxWrT1taG8ePHm3SsXMAshevr6wsej9fvA1hzczNGjBgBpVKpVWfqNwPPP/88rK2tsX//fohEIrS3t+Oll17Su31zczMAoLS0FCNHjmTVWVtbm3SsXMAshevo6Ijo6Gh8/PHHSEtL01rnqtVqODg4IDQ0FA0NDRAKhfDw8DBJ3yKRSOfv8EKhEImJiSgsLIRIJMKcOXNga2vLsqmqqoJGo2HKy8vLIRaLIZVK4ejoCGtra9TV1VneelYHZilcAPj4448xefJkTJgwAevWrcPYsWPx4MEDHDx4EJs3b8bly5cRGRmJSZMmYebMmdi4cSP8/PxQX1+P0tJSzJo1C48//rjB/Xp4eKC5uRmHDx9m/vR3//lfsGABAgMDAQAnTpzQatvW1ob58+cjMzMTtbW1yM7OxtKlS8Hn82Fvb4+MjAwsX74cnZ2dmDJlChobG3HixAlIJBIkJiYa94VxjcFcUA829fX1JCUlhbi7uxORSERGjhxJXnjhBaJQKBibpqYmkpqaStzc3IiVlRWRSqVk7ty5pK6ujhBi+MMZIYQsWrSIODk5EQAkOzubVffUU0+RoKAgrTbdfrOysoiTkxMRi8Vk4cKFrAfHzs5O8sEHHxB/f39iZWVFnJ2dSXR0NDly5IjB383DwlRa0msjeWtrK65duwZPT0/Y2NgMwn8n84QQAl9fXyxZsgTp6emsuqSkJKjVahw4cODhDG6QMJWWzHap8F/jzp072LVrFxoaGjBv3ryHPRzOQ4U7SLi4uOCRRx7BJ598gmHDhj3s4XAeKtxBor8VWVFR0eAMxEww270KFPOGCpfCSahwKZyECpfCSahwKZyECpfCSYx+Heb9rrcpxqEXv2UMbKthQ0MD8vLyUFpailu3bsHFxQXjxo3DsmXL8OyzzzJ2+fn5yMzMxIYNG/Dmm2+yfBQVFWHevHmIjo5GWVkZU65WqzFs2DAoFApMnTqVKVcoFCgoKEBFRQU0Gg08PDwQExOD9PR0jBw5EkqlEhERETrHq1Kp4OrqipycHOTm5gLoOnPLzc0NMTEx2LBhg8Gb3M0Ns59xa2tr8dhjj+HHH39EQUEBLl68iLKyMkRERCAlJYVlu23bNrz11lvYtm2bTl9CoRCHDh2CQqHos8+tW7ciMjISrq6u2LdvH6qrq7FlyxY0NjbivffeY9levXoVKpWKdbm4uDD1QUFBUKlUqKurQ2FhIcrKyrB48eIBfhvmg9n/ANEz9qzn9sagoCAkJyczn48cOQKNRoN169Zhx44dOHnyJMLCwli+hgwZgtmzZ2PVqlWoqKjQ2d/NmzeRlpaGtLQ01kZxDw8PhIeHa5126eLi0ufeX6FQCFdXVwDAyJEjERcXh8LCQn1v32wx6xm3O/YsJSWl39gzuVyO+Ph4WFlZIT4+HnK5XKfPnJwcXLx4sdcgxT179qCtrQ1vvfWWznpjNqjX1tbi+++/N/qITnPArIX766+/ghCCgICAPu2ampqwd+9eJCQkAAASEhLwxRdfMFEHPXFzc8Mbb7yBNWvW4MGDB1r1v/zyCyQSCUaMGKHXGB999FGIxWLmCgoKYtVfvHgRYrEYtra28PT0xKVLl7By5Uq9fJszZi1cPXZsAgB27twJb29vhISEAOgKG3d3d8fu3bt12q9cuRJ37tzRuRYmhBh0sNuxY8dQWVnJXN9++y2r3t/fH5WVlThz5gxWrlyJ6OhopKam6u3fXDFr4eobeyaXy3Hp0iUIhULmqq6u7vUhzcHBAatXr0Zubi5aWlpYdX5+fmhsbIRKpdJrjJ6envDx8WEud3d3Vr1IJIKPjw+Cg4OxYcMGCAQC5k2DJWPWwu0Ze3bv3j2terVajYsXL+Ls2bNQKpWsmU+pVOLUqVO9ij41NRV8Ph8ffvghq/yll16CSCTCxo0bdbYzNhVBZmYm3n33XdTX1xvlh+uY/VuF/mLPoqOjMWHCBISHh2u1feKJJyCXy1FQUKBVZ2Njg9zcXK1XalKpFDKZDEuXLkVTUxNeffVVeHh44ObNm9ixYwfEYjHrldjt27fR2trK8uHk5NTrad2TJk3C2LFj8fbbb+Ojjz4ayFdiFpj1jAsAXl5eOHfuHCIiIrBixQoEBwcjKioKhw8fxocffojPPvsMsbGxOtvGxsZix44dvWYZSkxMhJeXl1b5kiVL8MMPP+DWrVuYNWsWAgICsGDBAkgkEmRkZLBs/f39MWLECNb1008/9XlPy5cvx6effoobN27o+S2YHzTmjDKomEpLZj/jUswTKlwKJ6HCpXASKlwKJ6HCpXASKlwKJ6HCpXASKlwKJ6HCpXASKlwKJzF6k83rr79uinHoxdatWw1uc+fOHWRlZaG0tBS///47hg0bhpCQEISHhyMzM7PPtgqFgsl+A3QlUB4+fDjCw8NRUFCAUaNGsewvXbqE3NxcKBQKNDU1wd3dHXPmzMGqVatYuR08PDxw/fp17Ny5E3PmzGH5CAoKQnV1NQoLC5GUlMSq6yuY09Iw+xk3NjYW58+fx/bt21FTU4OvvvoKU6dOxZgxY1gBirNnz8a0adNYZd0xZxKJBCqVCrdu3cK+fftw9epVxMXFsfopLy/HxIkT0dbWhtLSUtTU1CAvLw9FRUWIiopi5SsDunaR/TN2rLy8HA0NDTrDjID+gzktCbPe1qhWq3Hs2DEolUomb4K7uzsmTJigZWtra4v79+8zgYk94fF4TPmIESMwf/58pKWloampCRKJBIQQzJ8/H4GBgSgpKQGfz2f68vPzw/jx4yGTyVghN3PnzoVMJsONGzcglUoBdAlz7ty52LFjh9YY9AnmtCTMesbtjuM6cOAA7t+/bxKft2/fxv79+yEQCCAQCAAAlZWVqK6uRnp6OiPabkJCQhAZGYmdO3eyyocPH47o6Ghs374dANDS0oLdu3ezIo97om8wp6Vg1sIVCoUoKirC9u3b4eDggMmTJ+N///sfLly4YJCfxsZGiMViDBkyBMOHD4dCoWBFDtfU1AAAk5jknwQGBjI2PUlOTkZRUREIIdi7dy+8vb1Zmdq7MSSY01Iwa+ECXWvc+vp6fPXVV5g2bRqUSiVCQ0MNOkjZ3t4elZWVOHv2LN577z2EhoYyaUp7om9wZjfTp09Hc3Mzjh49im3btvU62xoazGkJmL1wga4wm6ioKKxduxYnT55EUlISsrOz9W7P5/Ph4+ODwMBApKen48knn2SdJuPn5wcAuHz5ss72ly9fZmx6IhQK8corryA7OxsVFRWYO3euzvaGBnNaAhYh3H8yevRoncGT+rJq1Srs3r0b586dA9A1AwYEBEAmk6Gzs5NlW1VVhUOHDiE+Pl6nr+TkZBw5cgQzZszQmRtioMGc5o5Zv1W4e/cu4uLikJycjLFjx8Le3h5nz57Fxo0bMWPGjAH7lUqlmDVrFrKysvDNN9+Ax+NBLpcjKioKsbGxWL16NVxdXVFRUYEVK1Zg0qRJWLZsmU5fgYGB+OOPP1jveXsil8sHFMxp7pj1jCsWizFx4kTIZDKEh4cjODgYa9euxcKFC42OkF2+fDlKS0tx+vRpAEBYWBjKy8shEAgQExMDHx8frF69GomJiTh48GCf+XadnJy00qMCXZkmjQnmNGdosCRlUKHBkhSLhgqXwkmocCmchAqXwkmocCmchAqXwkmocCmchAqXwkmocCmchAqXwklMIFzeIF6Gk5SUhJkzZzL/5vF4WLRokZZdSkoKeDweK0DRUPupU6fq3ExTVFTEShOVk5Oj029lZSV4PB5qa2sBdKWH4vF4Oq/y8nK9vwNzxOJmXKlUil27dkGj0TBlra2tKC4u1oraHYi9vtjY2EAul+OXX37p1/bQoUNa2Scfe+yxAfdtDliccENDQyGVSlFSUsKUlZSUYNSoURg/frzR9vri7++PiIgIrFmzpl9bJycnuLq6sq7eckRYChYnXKBr83bP0PBt27YxZyeYwl5fNmzYgH379uHs2bNG+7I0LFK4CQkJOH78OK5fv47r16/jxIkTTCCiKez1JTQ0FLNnz+43U2RYWBgr+6RYLDa6b65j1hEQveHs7Izp06czEbbTp0/HI488YjJ7Q1i/fj0CAwPxww8/sLKm92T37t29RhBbKhYpXKDrz//SpUsBdOVCM4W9RCJBY2OjVrlarcbQoUN1tvH29sbChQuxatWqXs9KkEql8PHx6XeMloRFLhUAYNq0aWhra0N7ezuio6NNYu/v788EUPbk3LlzOqN8u8nKykJNTQ127dql/w1YOBY74woEAiacvPtEGmPtFy9ejI8++ghpaWlYsGABrK2tUVpaip07d+Lrr7/u1ffw4cORnp7ea9Dj3bt30dDQwCpzcHCw6DAqi51xga4/7RKJxGT2Xl5eOHr0KK5cuYLIyEhMnDgRX3zxBfbs2YNp06b16TsjI6PXh67IyEit7JMHDhzQe9zmCA2WpAwqNFiSYtFQ4VI4CRUuhZNQ4VI4CRUuhZNQ4VI4CRUuhZNQ4VI4CRUuhZNQ4VI4idGbbHJ5uaYYh15kE/3zNnSTlJTEpGTqSXR0NMrKypgsj6dOncKTTz7J1C9btow5sh7oCnDMze26V4FAAAcHB4wePRovvvgiFi9erHVwc39ZJpVKJSIiIvoce3dmy2XLlkGtVmvV83g87N+/nwkG5fF0B5R2Z7Ds7vOvv/5iBW92k5OTgwMHDqCyspJVfvPmTXh5ecHPzw8///xzv+MYDCxixv1nxkiVSsXKO2ZjY9NvFALQla5UpVKhrq4OCoUCcXFxyM/PR1hYGP7++2/GTp8sk2FhYXpntjSEwsJCrXs1VlBFRUWYPXs2mpqaUFFRYZQvU2ER2xqtra11Zozs5rXXXsOWLVvw7bff4rnnnuvVTigUMn7c3NwwZswYREVFISQkBO+88w7Wr19vUJbJnmPqK7OlITg4OBjtoyeEEBQWFmLTpk149NFHIZfLMXHiRJP5HygWMeP2h6enJxYtWoTVq1drZc3pj4CAAMTExDBRwAPJMvlfRqFQoKWlBZGRkUhISMCuXbuMylhkKixCuN98841WsOHbb7/NssnMzMS1a9fw+eefG+w/ICCAOcRjoFkm+6I7s6U+AZPx8fFadnV1dQb11xO5XI45c+ZAIBAgODgYXl5e2LNnz4D9mQqLWCpERERg8+bNrDJHR0fWZ2dnZ2RkZCArKwsvv/yyQf4JIVoPRoZmmewLe3t7nSFBvr6+WmUymQyRkZGsMjc3twH1q1arUVJSguPHjzNlCQkJkMvlrBN8HgYWIdwhQ4boFWyYnp6OTZs2YdOmTQb5v3z5Mjw9PQGws0zqOjCktyyTfdGd2VIfXF1dTRZYWVxcjNbWVtaalhCCzs5O1NTUGHwfpsQilgr6IhaLsXbtWuTl5bHeEvTFlStXUFZWxuQiMybL5H8NuVyOFStWsDJaVlVV4amnnnro6VgtYsa9f/++VrChUCjUeTbCa6+9BplMhuLiYq2n5wcPHqChoQGdnZ24e/culEol1q9fj3HjxuHNN98EAKOyTJoCtVqtda/29vZMpnegK82qvb0985nH4zEJrruprKzEuXPn8PnnnyMgIIBVFx8fj3Xr1mH9+vUQCrskdO3aNa33v76+vqx+TQrRA41GQ6qrq4lGo9HH/D9FYmIiAaB1+fv7E0IIcXd3JzKZjNWmuLiYACBPP/00U5adnc20FQgExNHRkUyZMoXIZDLS2tqq1e+FCxdIbGwscXR0JFZWVsTb25tkZmaSe/fu9TrOGTNmaJUXFhaSoUOH6mwDgOzfv5/1WdeVn59PCCFEoVDorBcIBMw9hoSEEEIIWbp0KRk9erTOflUqFeHz+eTLL7/ss99jx45ptTWVlmiwJGVQocGSFIuGCpfCSahwKZyECpfCSQwSrh7PcRRKn5hKQ3oJt/vY9paWFpN0SrFc2traAOh30GBf6PUDRPfG6du3bwMA7Ozset20TKH0RmdnJ+7cuQM7Ozvmh4uBonfr7j2e3eKlUAYCn8/HqFGjjJ749PoBoicdHR1ob283qlOK5SISibT2KQ8Eg4VLofwXoK/DKJyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZzk/wASffYcfzO+MQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Legend of cell type info only\n", "g = plt.figure(figsize = (1,1)).add_subplot(111)\n", "g.axis('off')\n", "handles = []\n", "for item in celltype_color_dict.keys():\n", " h = g.bar(0,0, color = celltype_color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),\n", "\n", "\n", "filename = \"Celltype_legend.png\"\n", "filename = os.path.join(metadata_images_dir, filename)\n", "plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "code", "execution_count": 59, "id": "4b3305bb-688b-4540-bcf8-32c7c0ff69fe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChanneltarget_lowerfull_columnmarkerlocalisation
0R0AF488300c2af488AF488_Cell_Intensity_AverageAF488cell
1R0AF488300c2af488AF488_Cytoplasm_Intensity_AverageAF488cytoplasm
2R0AF488300c2af488AF488_Nucleus_Intensity_AverageAF488nucleus
3R0AF5551500c3af555AF555_Cell_Intensity_AverageAF555cell
4R0AF5551500c3af555AF555_Cytoplasm_Intensity_AverageAF555cytoplasm
...........................
103R8Sting1000c4stingSting_Cytoplasm_Intensity_AverageStingcytoplasm
104R8Sting1000c4stingSting_Nucleus_Intensity_AverageStingnucleus
105R8CD11b1500c5cd11bCD11b_Cell_Intensity_AverageCD11bcell
106R8CD11b1500c5cd11bCD11b_Cytoplasm_Intensity_AverageCD11bcytoplasm
107R8CD11b1500c5cd11bCD11b_Nucleus_Intensity_AverageCD11bnucleus
\n", "

108 rows × 8 columns

\n", "
" ], "text/plain": [ " Round Target Exp Channel target_lower \\\n", "0 R0 AF488 300 c2 af488 \n", "1 R0 AF488 300 c2 af488 \n", "2 R0 AF488 300 c2 af488 \n", "3 R0 AF555 1500 c3 af555 \n", "4 R0 AF555 1500 c3 af555 \n", ".. ... ... ... ... ... \n", "103 R8 Sting 1000 c4 sting \n", "104 R8 Sting 1000 c4 sting \n", "105 R8 CD11b 1500 c5 cd11b \n", "106 R8 CD11b 1500 c5 cd11b \n", "107 R8 CD11b 1500 c5 cd11b \n", "\n", " full_column marker localisation \n", "0 AF488_Cell_Intensity_Average AF488 cell \n", "1 AF488_Cytoplasm_Intensity_Average AF488 cytoplasm \n", "2 AF488_Nucleus_Intensity_Average AF488 nucleus \n", "3 AF555_Cell_Intensity_Average AF555 cell \n", "4 AF555_Cytoplasm_Intensity_Average AF555 cytoplasm \n", ".. ... ... ... \n", "103 Sting_Cytoplasm_Intensity_Average Sting cytoplasm \n", "104 Sting_Nucleus_Intensity_Average Sting nucleus \n", "105 CD11b_Cell_Intensity_Average CD11b cell \n", "106 CD11b_Cytoplasm_Intensity_Average CD11b cytoplasm \n", "107 CD11b_Nucleus_Intensity_Average CD11b nucleus \n", "\n", "[108 rows x 8 columns]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata" ] }, { "cell_type": "code", "execution_count": 60, "id": "a54e644d-00a8-409a-9f62-12e6cd30e669", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Nuc_X', 'Sample_ID', 'Nucleus_Roundness', 'Nuc_Y_Inv',\n", " 'ROI_index', 'Cell_Size', 'Nucleus_Size',\n", " 'AF488_Cell_Intensity_Average',\n", " 'AF488_Cytoplasm_Intensity_Average',\n", " 'AF488_Nucleus_Intensity_Average', 'AF555_Cell_Intensity_Average',\n", " 'AF555_Cytoplasm_Intensity_Average',\n", " 'AF555_Nucleus_Intensity_Average', 'AF647_Cell_Intensity_Average',\n", " 'AF647_Cytoplasm_Intensity_Average',\n", " 'AF647_Nucleus_Intensity_Average', 'AF750_Cell_Intensity_Average',\n", " 'AF750_Cytoplasm_Intensity_Average',\n", " 'AF750_Nucleus_Intensity_Average', 'aSMA_Cell_Intensity_Average',\n", " 'aSMA_Cytoplasm_Intensity_Average',\n", " 'aSMA_Nucleus_Intensity_Average', 'AXL_Cell_Intensity_Average',\n", " 'AXL_Cytoplasm_Intensity_Average', 'AXL_Nucleus_Intensity_Average',\n", " 'B7H4_Cell_Intensity_Average', 'B7H4_Cytoplasm_Intensity_Average',\n", " 'B7H4_Nucleus_Intensity_Average', 'CA9_Cell_Intensity_Average',\n", " 'CA9_Cytoplasm_Intensity_Average', 'CA9_Nucleus_Intensity_Average',\n", " 'CD4_Cell_Intensity_Average', 'CD4_Cytoplasm_Intensity_Average',\n", " 'CD4_Nucleus_Intensity_Average', 'CD8_Cell_Intensity_Average',\n", " 'CD8_Cytoplasm_Intensity_Average', 'CD8_Nucleus_Intensity_Average',\n", " 'CD11b_Cell_Intensity_Average',\n", " 'CD11b_Cytoplasm_Intensity_Average',\n", " 'CD11b_Nucleus_Intensity_Average', 'CD11c_Cell_Intensity_Average',\n", " 'CD11c_Cytoplasm_Intensity_Average',\n", " 'CD11c_Nucleus_Intensity_Average', 'CD20_Cell_Intensity_Average',\n", " 'CD20_Cytoplasm_Intensity_Average',\n", " 'CD20_Nucleus_Intensity_Average', 'CD31_Cell_Intensity_Average',\n", " 'CD31_Cytoplasm_Intensity_Average',\n", " 'CD31_Nucleus_Intensity_Average', 'CD44_Cell_Intensity_Average',\n", " 'CD44_Cytoplasm_Intensity_Average',\n", " 'CD44_Nucleus_Intensity_Average', 'CD45_Cell_Intensity_Average',\n", " 'CD45_Cytoplasm_Intensity_Average',\n", " 'CD45_Nucleus_Intensity_Average', 'CD68_Cell_Intensity_Average',\n", " 'CD68_Cytoplasm_Intensity_Average',\n", " 'CD68_Nucleus_Intensity_Average', 'CD163_Cell_Intensity_Average',\n", " 'CD163_Cytoplasm_Intensity_Average',\n", " 'CD163_Nucleus_Intensity_Average', 'CKs_Cell_Intensity_Average',\n", " 'CKs_Cytoplasm_Intensity_Average', 'CKs_Nucleus_Intensity_Average',\n", " 'ColVI_Cell_Intensity_Average',\n", " 'ColVI_Cytoplasm_Intensity_Average',\n", " 'ColVI_Nucleus_Intensity_Average', 'Desmin_Cell_Intensity_Average',\n", " 'Desmin_Cytoplasm_Intensity_Average',\n", " 'Desmin_Nucleus_Intensity_Average', 'Ecad_Cell_Intensity_Average',\n", " 'Ecad_Cytoplasm_Intensity_Average',\n", " 'Ecad_Nucleus_Intensity_Average',\n", " 'Fibronectin_Cell_Intensity_Average',\n", " 'Fibronectin_Cytoplasm_Intensity_Average',\n", " 'Fibronectin_Nucleus_Intensity_Average',\n", " 'FOXP3_Cell_Intensity_Average',\n", " 'FOXP3_Cytoplasm_Intensity_Average',\n", " 'FOXP3_Nucleus_Intensity_Average', 'GATA3_Cell_Intensity_Average',\n", " 'GATA3_Cytoplasm_Intensity_Average',\n", " 'GATA3_Nucleus_Intensity_Average', 'HLA_Cell_Intensity_Average',\n", " 'HLA_Cytoplasm_Intensity_Average', 'HLA_Nucleus_Intensity_Average',\n", " 'Ki67_Cell_Intensity_Average', 'Ki67_Cytoplasm_Intensity_Average',\n", " 'Ki67_Nucleus_Intensity_Average', 'MMP9_Cell_Intensity_Average',\n", " 'MMP9_Cytoplasm_Intensity_Average',\n", " 'MMP9_Nucleus_Intensity_Average', 'PD1_Cell_Intensity_Average',\n", " 'PD1_Cytoplasm_Intensity_Average', 'PD1_Nucleus_Intensity_Average',\n", " 'PDGFR_Cell_Intensity_Average',\n", " 'PDGFR_Cytoplasm_Intensity_Average',\n", " 'PDGFR_Nucleus_Intensity_Average', 'PDL1_Cell_Intensity_Average',\n", " 'PDL1_Cytoplasm_Intensity_Average',\n", " 'PDL1_Nucleus_Intensity_Average', 'r5c2_Cell_Intensity_Average',\n", " 'r5c2_Cytoplasm_Intensity_Average',\n", " 'r5c2_Nucleus_Intensity_Average', 'r7c2_Cell_Intensity_Average',\n", " 'r7c2_Cytoplasm_Intensity_Average',\n", " 'r7c2_Nucleus_Intensity_Average', 'r8c2_Cell_Intensity_Average',\n", " 'r8c2_Cytoplasm_Intensity_Average',\n", " 'r8c2_Nucleus_Intensity_Average', 'Sting_Cell_Intensity_Average',\n", " 'Sting_Cytoplasm_Intensity_Average',\n", " 'Sting_Nucleus_Intensity_Average',\n", " 'Vimentin_Cell_Intensity_Average',\n", " 'Vimentin_Cytoplasm_Intensity_Average',\n", " 'Vimentin_Nucleus_Intensity_Average', 'cell_type', 'cell_subtype'],\n", " dtype=object)" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns.values" ] }, { "cell_type": "code", "execution_count": 61, "id": "23fc3f34-ae00-4e5b-9003-bbdb69442dc6", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(350554, 117)" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape\n" ] }, { "cell_type": "code", "execution_count": 62, "id": "f0434951-c5b4-4bc3-bbb8-329ed86603bb", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(108, 8)" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata.shape" ] }, { "cell_type": "code", "execution_count": 63, "id": "8fdee906-95a9-4e44-8990-5ee8ab25458a", "metadata": {}, "outputs": [], "source": [ "# Divide each marker (and its localisation) by the right exposure setting for each group of samples\n", "df.loc[:, ~df.columns.isin(not_intensities)] = \\\n", " df.loc[:, ~df.columns.isin(not_intensities)].apply(lambda column: divide_exp_time(column, 'Exp', metadata), axis = 0)" ] }, { "cell_type": "markdown", "id": "23ac4db9-d5e4-47ca-a8d8-3250ee3a9a03", "metadata": {}, "source": [ "## II.6. *CELL SUBTYPES COLORS" ] }, { "cell_type": "code", "execution_count": 64, "id": "8f77d2ca-4282-4ffd-987d-9ab547b59b96", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unique cell types are: ['none']\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7YAAABlCAYAAAB5uH+EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAEpElEQVR4nO3cv25bdRzG4V+cgMkf21I2LHvKDXAl3APqwGVETKAihqpCYmCGEbEACxsIqRIbA1OGWLLEarcOTYkPQxUmlJxzQvTjbZ5nPoq+ejPEH9nOTtM0TQEAAIBQg9oHAAAAwF0IWwAAAKIJWwAAAKIJWwAAAKIJWwAAAKIJWwAAAKIJWwAAAKLttXlou92W5XJZRqNR2dnZue+bAAAAeOCapinr9bpMp9MyGNz8nmyrsF0ul2U+n/8nxwEAAEBbi8WizGazG59pFbaj0aiUUsrT75+V/cOju1/2QHz0zW+1T4j06Qe/1z4hzic/f137hEg/Xn5Y+4Q4f3z8uPYJkd798v3aJ8QZ/PRF7RMi/fres9onxPnqyS+1T4j02Q9N7RPifPfo89onxNm8uiqPvj37p0dv0ipsrz9+vH94VA6Obv+hvDYYHtQ+IdLB6J3aJ8TZ3d+tfUKk8e5+7RPibG75GBD/bnw4rH1CnIHJejk68AZEV2/v+VvQx3gsbLs6eMvrtb7afB3WKxQAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACi7bV5qGmaUkopFy+e3+sxb5rty03tEyJt1n/WPiHO1cVV7RMirS4vap8QZ73d1j4h0urFy9onxBmYrJfnG6/Vurr8y9+CPlarpvYJcTavvF7r6nqz6x69yU7T4qmzs7NycnJy98sAAACgg8ViUWaz2Y3PtHrH9vj4uJRSyvn5eZlMJne/7IFYrVZlPp+XxWJRxuNx7XMi2Kwfu3Vns37s1p3N+rFbdzbrx27d2awfu3XXNE1Zr9dlOp3e+myrsB0MXn8VdzKZ+CX0MB6P7daRzfqxW3c268du3dmsH7t1Z7N+7NadzfqxWzdt31j1z6MAAACIJmwBAACI1ipsh8NhOT09LcPh8L7veaPYrTub9WO37mzWj926s1k/duvOZv3YrTub9WO3+9XqvyIDAADA/5WPIgMAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABDtbxF10D5wSqeJAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Establish colors to use throughout workflow\n", "\n", "# we want colors that are categorical, since Cell Type is a non-ordered category. \n", "# A categorical color palette will have dissimilar colors.\n", "# Get those unique colors\n", "cell_subtypes = ['DC','B', 'TCD4','TCD8','M1','M2','Treg', \\\n", " 'IMMUNE_OTHER', 'CANCER', 'αSMA_myCAF',\\\n", " 'STROMA_OTHER', 'ENDOTHELIAL']\n", "color_values = sb.color_palette(\"Paired\",n_colors = len(cell_subtypes))\n", "# each color value is a tuple of three values: (R, G, B)\n", "\n", "print(\"Unique cell types are:\",df.cell_subtype.unique())\n", "# Display those unique colors\n", "sb.palplot(sb.color_palette(color_values))" ] }, { "cell_type": "code", "execution_count": 65, "id": "bf75f91b-2bc2-4d4f-bc94-861c6ccf27da", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),\n", " 'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),\n", " 'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),\n", " 'TCD8': (0.2, 0.6274509803921569, 0.17254901960784313),\n", " 'M1': (0.984313725490196, 0.6039215686274509, 0.6),\n", " 'M2': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),\n", " 'Treg': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),\n", " 'IMMUNE_OTHER': (1.0, 0.4980392156862745, 0.0),\n", " 'CANCER': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),\n", " 'αSMA_myCAF': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),\n", " 'STROMA_OTHER': (1.0, 1.0, 0.6),\n", " 'ENDOTHELIAL': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392)}" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Store in a dctionnary\n", "cellsubtype_color_dict = dict(zip(cell_subtypes, color_values))\n", "cellsubtype_color_dict" ] }, { "cell_type": "code", "execution_count": 66, "id": "b24374ef-ea1c-4910-a343-364fa7567497", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/cellsubtype_color_data.csv was created!\n" ] } ], "source": [ "# Save color information (mapping and legend) to metadata directory\n", "# Create dataframe\n", "cellsubtype_color_df = color_dict_to_df(cellsubtype_color_dict, \"cell_subtype\")\n", "cellsubtype_color_df.head()\n", "\n", "# Save to file in metadatadirectory\n", "filename = \"cellsubtype_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "cellsubtype_color_df.to_csv(filename, index = False)\n", "print(\"File\" + filename + \" was created!\")" ] }, { "cell_type": "code", "execution_count": 67, "id": "90df1f96-7934-42d9-948f-69fdbbc284df", "metadata": { "tags": [] }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAL8AAAEvCAYAAAAU+xDxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvH0lEQVR4nO3de1yM6f8/8NdUmnRQSURS0UEpOdbGB0Ukh8VKRLaQXeQQi5JDsR3Qxzcf381pdbKUU4UlOZZziHIqsj7FYlrHEEmZ6/eHX/e3MVPNNFNTc1/Px2Mej537vu77vu7ZV7f7nrnf98UhhBBQFAspybsDFCUvNPwUa9HwU6xFw0+xFg0/xVo0/BRr0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsRYNP8VaNPwUa6lIusCXL19QUVHREH2hKLGoqqpCSUn647bY4SeEoLi4GCUlJVJvlKKkoaSkBFNTU6iqqkq1Ho64xSw8Hg8lJSVo27Yt1NXVweFwpNowRdUHn8/Hs2fP0KJFC3Tq1EmqHIp15P/y5QsTfD09vXpvjKJkQV9fH8+ePUNlZSVatGhR7/WIdeJUdY6vrq5e7w1RlKxUne58+fJFqvVIdNVAT3WopkBWOaRfdVKsRcP/jZCQEPTo0YN57+Pjg7Fjxzba9qjGo1DhLy4uxrx589C5c2dwuVwYGRlh9OjROH36tLy7JpWG/gNkK4l/5GqqioqK0L9/f+jo6CAyMhK2traoqKjA8ePH4efnh3v37sm7i1QTozBH/jlz5oDD4eDq1asYP348LCws0K1bNyxatAhZWVlMu5KSEvj6+kJfXx+tWrXC4MGDcfPmzXpv99GjRxg9ejR0dXWhoaGBbt26IS0tDQAQHx8PHR0dgfYHDx4UecG2bds2GBkZQV1dHR4eHnj79i2Ar6dFCQkJOHToEDgcDjgcDjIzMzF48GDMnTtXYB0vXryAqqoq8y+diYkJfv31V3h6ekJDQwOGhoaIjo4WWEbWn0dzohDhf/36NdLT0+Hn5wcNDQ2h+dUDOGHCBDx//hzHjh3D9evX0atXLwwZMgSvX7+u17b9/PxQXl6Oc+fO4fbt21i3bh00NTUlWsdff/2Fffv24c8//0R6ejpycnIwZ84cAMDixYvh4eGB4cOHg8fjgcfjoV+/fvD19UViYiLKy8uZ9ezatQuGhoYYPHgwMy0yMhJ2dnbIyclBYGAgFixYgJMnTzbY59GsEDGUlZWRvLw8UlZWJk7zRnflyhUCgKSkpNTa7vz586RVq1bk06dPAtO7dOlCtm3bRgghJDg4mNjZ2THzvL29yZgxY2pcp62tLQkJCRE5Ly4ujmhrawtMS01NJdU/9uDgYKKsrEyePHnCTDt27BhRUlIiPB6vxj6UlZURXV1dsnfvXmZa9+7dBfpibGxMhg8fLrDcxIkTiZubGyFEvM+jKZJVHhXiyE/EfNzozZs3UVpaCj09PWhqajKvwsJCPHz4sF7bnj9/PkJDQ9G/f38EBwfj1q1bEq+jU6dOMDQ0ZN47OjqCz+fj/v37NS6jpqaGqVOnIjY2FgBw48YN3LlzBz4+PgLtHB0dhd7n5+cDaJjPozlRiAtec3NzcDicOi9qS0tL0b59e2RmZgrN+/bcXFy+vr5wdXXF0aNHceLECURERGDDhg2YN28elJSUhP4wZXlHrK+vL3r06IEnT54gLi4OgwcPhrGxsdjLN8Tn0ZwoxJG/devWcHV1RXR0ND58+CA0v+pO1F69eqG4uBgqKiowMzMTeLVp06be2zcyMsKsWbOQkpKCX375Bb///juAr/egvH//XqBPubm5Qss/fvwYz549Y95nZWVBSUkJlpaWAL7+nC/qp3xbW1v06dMHv//+OxITEzF9+nShNtUv9qveW1lZAWi4z6O5UIjwA0B0dDS+fPkCe3t7JCcn48GDB8jPz8emTZuYf/pdXFzg6OiIsWPH4sSJEygqKsKlS5ewfPlyZGdn12u7/v7+OH78OAoLC3Hjxg1kZGQw4XJwcIC6ujqCgoLw8OFDJCYmIj4+Xmgdampq8Pb2xs2bN3H+/HnMnz8fHh4eMDAwAPD1W5tbt27h/v37ePnypcC/Hr6+vli7di0IIRg3bpzQui9evIj169ejoKAA0dHR2L9/PxYsWNBgn0ez0pgXGA3t2bNnxM/PjxgbGxNVVVViaGhIvv/+e5KRkcG0effuHZk3bx7p0KEDadGiBTEyMiJTpkwhjx8/JoRIfsE7d+5c0qVLF8Llcom+vj6ZOnUqefnyJTM/NTWVmJmZkZYtW5JRo0aR7du3C13w2tnZkc2bN5MOHToQNTU14u7uTl6/fs20ef78ORk6dCjR1NQkAAT25/3790RdXZ3MmTNHqG/GxsZk9erVZMKECURdXZ0YGBiQ//znPwJt6vo8miJZ5VGs+/k/ffqEwsJCmJqaQk1NreH/IimxFRUVoUuXLrh27Rp69eolMM/ExAT+/v7w9/eXT+caiKzyqBAXvGxUUVGBV69eYcWKFfjuu++Egk/VTWHO+dnm4sWLaN++Pa5du4atW7fKuzvNEj3yN1NOTk51/r5RVFTUOJ1ppuiRn2ItGn6KtWj4Kdai4adYi4afYi0afoq1aPgp1pL6e/6knCey6IdYPHt2lHgZHx8fJCQkAABUVFTQunVrdO/eHZ6envDx8RF44GlOTg7Cw8Nx7tw5vH37FkZGRnBycsKSJUtgYWEhs/2gmgZWHPmrSgCLiopw7NgxODs7Y8GCBRg1ahQqKysBAEeOHMF3332H8vJy7N69G/n5+di1axe0tbWxcuVKOe8B1RBY8Qsvl8tlbg82NDREr1698N1332HIkCGIj4/H5MmTMW3aNIwYMQKpqanMcqampnBwcKBPplZQrAi/KIMHD4adnR1SUlKgp6eHly9fYunSpSLbsqGqiY1YcdpTk65du6KoqAgPHjxg3lPswerwE0LA4XDELoCnFAurw5+fnw9TU1Pmmxz6VDd2YW34z5w5g9u3b2P8+PEYNmwY2rRpg/Xr14tsSy94FRMrLnjLy8tRXFyML1++4J9//kF6ejoiIiIwatQo/Pjjj1BWVsaOHTswYcIEfP/995g/fz7MzMzw8uVL7Nu3D48fP8aePXvkvRuUjLEi/Onp6Wjfvj1UVFSgq6sLOzs7bNq0Cd7e3syPXGPGjMGlS5cQERGByZMn4927dzAyMsLgwYMRGhoq5z2gGgItYKeaHVnlkbXn/BRFw0+xFg0/xVo0/BRr0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsZbU9/aYLDsqi36IpShipMTLVC9gB74OYdS3b1+sX78e3bt3l2X3qGaGFUf+6mPYnj59GioqKhg1apS8u0XJGSvCX1XAbmBggB49eiAwMBB///03Xrx4Ie+uUXLEivBXV1pail27dsHMzAx6enry7g4lR6y4n//IkSPQ1NQEAHz48AHt27fHkSNHBB5YRbEPK/7vOzs7Izc3F7m5ubh69SpcXV3h5uaGR48eybtrlByxIvwaGhrM4Mp9+/bFjh078OHDB2awaIqdWBH+b3E4HCgpKaGsrEzeXaHkiBXn/FUF7ADw5s0b/PbbbygtLcXo0aPl3DNKnlgR/qoCdgDQ0tJC165dsX//fjg5Ocm3Y5Rc0QJ2qtmhBewUJSUafoq1aPgp1qLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtaS+t+fPokhZ9EMso02WiN2Ww+HUOj84OBghISF1jrpeVFQEU1NTZjlNTU106tQJTk5O8Pf3h7m5ucj1X7x4EYMGDYKNjQ1yc3PF7jfVeBT2yF9VsM7j8bBx40a0atVKYNrixYslGnX91KlT4PF4uHnzJsLDw5Gfnw87OzucPn1aaNslJSX48ccfMWTIkMbaXaoeFPauzqoR1wFAW1sbHA5HYNrHjx8lGnVdT0+PWb5z584YPXo0hgwZghkzZuDhw4dQVlZm2s6aNQuTJ0+GsrIyDh482DA7SElNYY/8dTl+/LhUo64rKSlhwYIFePToEa5fv85Mj4uLw3//+18EBwfLsrtUA2Bt+GUx6nrVskVFRcw6AwMDsWvXLqioKOw/qgqDteGXxajrVevgcDj48uULJk+ejNWrVzODWlNNG2sPT9VHXXd0dKzXOvLz8wF8vU54//49srOzkZOTg7lz5wIA+Hw+CCFQUVHBiRMnMHjwYNl0npIJ1oa/+qjr1S94q5SUlNR63s/n87Fp0yaYmpqiZ8+e4HA4uH37tkCbzZs348yZMzhw4IDA16VU08Da8GtoaEg06vqrV69QXFyMjx8/4s6dO9i4cSOuXr2Ko0ePMt/02NjYCGyjbdu2UFNTE5pONQ2sDT8g2ajrLi4uAAB1dXUYGxvD2dkZ27dvh5mZmTy6TskALWCnmh1awE5RUqLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtWj4Kdai4adYS+p7e/ok2MqiH2LJ9r5dd6P/T14F7Lt378b69evx4MEDaGtrw83NDZGRkXTY0yZIYY/88ihgv3jxIn788UfMmDEDd+/exf79+3H16lXMnDmzsXefEoPC3tUpjwL2y5cvw8TEBPPnz2fW9fPPP2PdunUNuKdUfSnskb8uDVHA7ujoiL///htpaWkghOCff/7BgQMHMGLECFl3n5IB1oa/IQrY+/fvj927d2PixIlQVVWFgYEBtLW1ER0dLXV/KdljbfhlXcAOAHl5eViwYAFWrVqF69evIz09HUVFRZg1a5bU26JkT2HP+esi6wJ2AIiIiED//v2xZMnXxyp2794dGhoaGDBgAEJDQ5nhUKmmgbVH/uoF7KJ8e8H7rW8L2IGvF9FKSoIfaVV9ryz+paFki7VH/oYoYB89ejRmzpyJLVu2wNXVFTweD/7+/rC3t0eHDh3ktatUTYgYysrKSF5eHikrKxOneZMTFxdHtLW1Rc67du0a+eGHH4i+vj7hcrnEzMyM/PTTT+TBgweEEEIKCwsJAOalrq5OrKysyJw5c5g21W3atIlYW1uTli1bkvbt25MpU6aQJ0+eNOTusY6s8kgL2KlmhxawU5SUaPgp1qLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtWj4KdaS+sa2yt+3y6IfYlGZ+ZPEy/j4+CAhIQE///wztm7dKjDPz88Pmzdvhre3N+Lj43Hu3DlERkbi+vXr4PF4SE1NxdixY2XUe6qpYcWR38jICHv27EFZWRkz7dOnT0hMTESnTp2YaR8+fICdnR2tvGIJVtzS3KtXLzx8+BApKSmYMmUKACAlJQWdOnUSeCyJm5sb3Nzc5NVNqpGx4sgPANOnT0dcXBzzPjY2FtOmTZNjjyh5Y034vby8cOHCBTx69AiPHj3CxYsX4eXlJe9uUXLEitMeANDX18fIkSMRHx8PQghGjhyJNm3ayLtblByxJvzA11OfqtHR6UUtxarwDx8+HJ8/fwaHw4Grq6u8u0PJGavCr6yszDxupKrovLrS0lL89ddfzPvCwkLk5uaidevWAl+JUopB6vDX54cneWrVqlWN87Kzs+Hs7My8X7RoEQAwP4JRioUWsFPNDi1gpygp0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsRYNP8VaNPwUa9HwU6wl9b09TwyNZNEPsXR8+rfEy0hSwB4REYGUlBTcu3cPLVu2RL9+/bBu3TpYWlrKaheoJoQVR35xC9jPnj0LPz8/ZGVl4eTJk6ioqMCwYcPw4cMHeXSbamCsuKVZ3AL29PR0geXi4+PRtm1bXL9+HQMHDmzUPlMNjxVHfqB+Bexv374FALRu3bpB+0bJB2vCL2kBO5/Ph7+/P/r37w8bG5tG7CnVWFhx2gNIXsDu5+eHO3fu4MKFC43YS6oxsSb8gPgF7HPnzsWRI0dw7tw5dOzYsbG6RzUyVoW/rgJ2QgjmzZuH1NRUZGZmClwMU4qHVeGvq4Ddz88PiYmJOHToELS0tFBcXAwA0NbWRsuWLRu1r1TDkzr89fnhSZ5qK2DfsmULAMDJyUlgelxcHHx8fBqwV5Q80AJ2qtmhBewUJSUafoq1aPgp1qLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtaS+t4ef6S+DbohHyWmj2G05HE6t84ODgxESEiJdh6hmTWHv6uTxeMx/7927F6tWrcL9+/eZaZqamsx/E0Lw5csXqKgo7MdBiaCwpz0GBgbMS1tbGxwOh3l/7949aGlp4dixY+jduze4XC4uXLgAPp+PiIgImJqaomXLlrCzs8OBAwcE1nv48GGYm5tDTU0Nzs7OSEhIAIfDQUlJiXx2lKo3Vh/qAgMD8e9//xudO3eGrq4uIiIisGvXLmzduhXm5uY4d+4cvLy8oK+vj0GDBqGwsBDu7u5YsGABfH19kZOTg8WLF8t7N6h6YnX416xZg6FDhwIAysvLER4ejlOnTsHR0REA0LlzZ1y4cAHbtm3DoEGDsG3bNlhaWiIyMhIAYGlpiTt37iAsLExu+0DVH6vD36dPH+a///rrL3z8+JH5Y6jy+fNn9OzZEwBw//599O3bV2C+vb19w3eUahCsDr+Ghgbz36WlpQCAo0ePwtDQUKAdl8tt1H5RjYPV4a/O2toaXC4Xjx8/xqBBg0S2sbS0RFpamsC0a9euNUb3qAZAw///aWlpYfHixVi4cCH4fD7+9a9/4e3bt7h48SJatWoFb29v/Pzzz/if//kfBAQEYMaMGcjNzWUGp67rdwWqCSJiKCsrI3l5eaSsrEyc5k1OXFwc0dbWZt5nZGQQAOTNmzcC7fh8Ptm4cSOxtLQkLVq0IPr6+sTV1ZWcPXuWaXPo0CFiZmZGuFwucXJyIlu2bCEAmu1n0xzJKo+0gF1KYWFh2Lp1K/7+u3k9xaI5k1Ue6WmPhDZv3oy+fftCT08PFy9eRGRkJPMUOKp5oeGX0IMHDxAaGorXr1+jU6dO+OWXX7Bs2TJ5d4uqB3raQzU79Lk9FCUlGn6KtWj4Kdai4adYi4afYi0afoq1aPgp1pI+/CGcxnvVg4+PD8aOHcv8N4fDwaxZs4Ta+fn5gcPhCAxCIWl7Jycn+Pv7C7WNj4+Hjo7O/31kISEi15ubmwsOh4OioiIAQFFRETgcjshXVlaWWPtfVlaG4OBgWFhYgMvlok2bNpgwYQLu3r3LtDExMalxO9X3kcPh4ODBg0LbqP4ZV70XtZ7hw4eL3Ka6ujpsbW2xY8cOsfZJVlh35Bd3NPb6theXmpoaYmJi8ODBgzrbnjp1CjweT+DVu3fvOpcrLy+Hi4sLYmNjERoaioKCAqSlpaGyshIODg7MH9C1a9eY9SYnJwP4WrhTNe0///mPxPs3fPhwoT4nJSUJtFmzZg14PB7u3LkDLy8vzJw5E8eOHZN4W/XFuvD36tULRkZGSElJYaZVjcZeVbElTXtxWVpawtnZGcuXL6+zrZ6enkBBvoGBAVq0aFHnchs3bsTly5dx5MgReHh4wNjYGPb29khOToaVlRVmzJgBQgj09fWZ9VYNuN22bVuBBwBIisvlCvVZV1dXoI2WlhYMDAzQuXNnBAQEoHXr1jh58qTE26ov1oUfkHw09vqM3i6OtWvXIjk5GdnZ2VKvS5TExEQMHToUdnZ2AtOVlJSwcOFC5OXl4ebNmw2ybUnw+XwkJyfjzZs3UFVVbbTtsjL8ko7GLml7cfXq1QseHh4ICAiotV2/fv2gqakp8BJHQUEBrKysRM6rml5QUCBRnz09PYX6snv3bqF2R44cEWoXHh4u0CYgIACamprgcrlwd3eHrq4ufH19JeqPNFh5V6eko7FL2l4SoaGhsLKywokTJ9C2bVuRbfbu3VtjiOsixn2LEomKioKLi4vAtICAAHz58kVgmrOzMzO6ZZWqU6oqS5YsgY+PD3g8HpYsWYI5c+bAzMxMpv2tDSvDD4g/Grsk7Vu1aoW3b98KTS8pKanxvLlLly6YOXMmAgMDERMTI7KNkZFRvUJhYWHBjDv8rarpFhYWEq3TwMBAqC9aWlpCD+3S0NCos89t2rSBmZkZzMzMsH//ftja2qJPnz6wtraWqE/1xcrTHuD/RmOvqKgQORp7fdpbWlrixo0bQtNv3LhRa8hWrVqFgoIC7NmzR/wdEMOkSZNw6tQpofN6Pp+PqKgoWFtbC10PyIuRkREmTpzYqLURrD3y1zUae33az549G7/99hvmz58PX19fcLlcHD16FElJSfjzzz9rXHe7du2waNEi5mFY33r16hUzGnwVHR2dOu9lX7hwIQ4dOoTRo0djw4YNcHBwwD///IPw8HDk5+fj1KlTDVZ4X15eLtRnFRWVWk8XFyxYABsbG2RnZws8U6mhSB/+ENmeUzam2kZjr0/7zp0749y5c1i+fDlcXFzw+fNndO3aFfv37xf4gUeUxYsXY8uWLfj06ZPQvG/PsQEgKSkJkyZNqnWdampqOHPmDMLDwxEUFIRHjx5BS0sLzs7OyMrKgo2NTa3LSyM9PR3t27cXmGZpaYl79+7VuIy1tTWGDRuGVatWCT0ipiHQSi6q2aGVXBQlJRr+Zqxbt25C36XX9t07JYi1F7yKIC0tDRUVFSLntWvXrpF70/zQ8DdjxsbG8u5Cs0ZPeyjWouGnWIuGn2ItGn6KtWj4Kdai4adYS+qvOq/9KfqW2YbQd3T97mkvLi5GWFgYjh49iqdPn6Jt27bo0aMH/P39MWTIEKZdREQEVqxYgbVr12LJkiUC64iPj8e0adPg6uqK9PR0ZnpJSQl0dXWRkZEBJycnZnpGRgYiIyNx5coVlJWVwcTEBG5ubli0aBEMDQ2RmZkJZ2dnkf3l8XgwMDBASEgIVq9eDeBr9VWHDh3g5uaGtWvXCt0bT0lO4Y/8RUVF6N27N86cOYPIyEjcvn0b6enpcHZ2hp+fn0Db2NhYLF26FLGxsSLXpaKiglOnTiEjI6PWbW7btg0uLi4wMDBAcnIy8vLysHXrVrx9+xYbNmwQaFu9ULzqVb2opVu3buDxeHj8+DHi4uKQnp6O2bNn1/PToKpT+B+55syZAw6Hg6tXrwqMvtitWzdMnz6deX/27FmUlZVhzZo12LlzJy5duoR+/foJrEtDQwMeHh4IDAzElStXRG7vyZMnmD9/PubPn4+oqChmuomJCQYOHChU9NG2bVuBx5p8S0VFBQYGBgAAQ0NDTJgwQaCemKo/hT7yv379Gunp6fDz8xMIfpXqoYuJiYGnpydatGgBT0/PGquqQkJCcPv2bRw4cEDk/P379+Pz589YunSpyPm1Bb0uRUVFOH78eKMWeSsyhQ7/X3/9BUIIunbtWmu7d+/e4cCBA0xRupeXF/bt28eMzVtdhw4dsGDBAixfvhyVlZVC8x88eIBWrVoJ3ctek44dOwrckNatWzeB+bdv34ampiZatmwJU1NT3L17t86Cd0o8Ch1+cYu3k5KS0KVLF6akr0ePHjA2NsbevXtFtg8ICMCLFy9EXhsQQiSqjjp//jxyc3OZ17dFHJaWlsjNzcW1a9cQEBAAV1dXzJs3T+z1UzVT6PCbm5uDw+HUWj0EfD3luXv3LlRUVJhXXl5ejRe+Ojo6WLZsGVavXo2PHz8KzLOwsMDbt2/B4/HE6qOpqSlTxG1mZiZ0s5qqqirMzMxgY2ODtWvXQllZmfkGiJKOQoe/devWcHV1RXR0ND58+CA0v6SkBLdv30Z2djYyMzMFjsCZmZm4fPlyjX848+bNg5KSktCj/Nzd3aGqqor169eLXO7bC15JrVixAv/+97/x7NkzqdZDseDbnujoaPTv3x/29vZYs2YNunfvjsrKSpw8eRJbtmyBq6sr7O3tMXDgQKFl+/bti5iYGJGF5Wpqali9erXQ16VGRkaIiorC3Llz8e7dO/z4448wMTHBkydPsHPnTmhqagp83fn8+XOhul09Pb0aH0fo6OiI7t27Izw8HL/99lt9PhKqSmOOeC0vz549I35+fsTY2JioqqoSQ0ND8v3335Pjx48TPT09sn79epHLrVu3jrRt25Z8/vxZaBR3QgiprKwk1tbWBADJyMgQmHfy5Eni6upKdHV1iZqaGunatStZvHgxefbsGSHk/0aBF/W6fPkyIYSQ4OBgYmdnJ9SvpKQkwuVyyePHj6X+bJojOgI7xVq0gJ2ipETDT7EWDT/FWjT8FGvR8FOsRcNPsRYNP8VaNPwUa9HwU6xFw0+xltQ3tvkN/F0W/RBL9LmZMl/nx48f8euvv2Lfvn14+vQptLS0YG1tjUWLFmHMmDEAvo6sfvbsWURERCAwMFBg+ZEjRyItLQ3BwcEICQkRmJeUlAQvLy/MmjVLrHG/GsO7d++wbt06JCcno6ioCDo6OrCxscGcOXMwbtw4gVqE2vpfUwH+8uXLERoa2uD7IQusP/LPmjULKSkp+N///V/cu3cP6enpcHd3x6tXrwTaGRkZIT4+XmDa06dPcfr06RqrtmJiYrB06VIkJSWJHHGlsZWUlKBfv37YuXMnli1bhhs3buDcuXOYOHEili5dKjSYnjj9/7YA/9uDQ1Om8OEvLS3FtGnToKWlhXbt2iEyMhJPnz6Furo6SktLcfjwYQQFBWHEiBEwMTFB7969MW/ePIHidgAYNWoUXr58iYsXLzLTEhISMGzYMJFDiBYWFuLSpUsIDAyEhYWFwAjudYmPj4eOjg6OHDkCS0tLqKurw93dHR8/fkRCQgJMTEygq6uL+fPnM0OArlmzRuQwQz169MDKlSsBAEFBQSgqKsKVK1fg7e0Na2trWFhYYObMmcjNzRUY31fc/lcfqd3AwEDsMYKbAoUPv4+PDy5duoTMzEzExcVh5cqVCAoKgouLCzQ1NWFgYIC0tDS8f/++1vWoqqpiypQpAk9OiI+PF/ojqRIXF4eRI0dCW1sbXl5eNRbE1+Tjx4/YtGkT9uzZg/T0dGRmZmLcuHFIS0tDWloa/vjjD2zbto0ppJ8+fTry8/Nx7do1Zh05OTm4desWpk2bBj6fjz179mDKlCno0KGD0PY0NTWhovJ/Z8HS9r85UOjwv3z5EikpKQgJCUHv3r0xYsQIjBs3Djt37sTYsWMBANu3b8elS5egp6eHvn37YuHChQJH9+qmT5+Offv24cOHDzh37hzevn2LUaNGCbXj8/mIj49nCuInTZqECxcuoLCwUOy+V1RUYMuWLejZsycGDhwId3d3XLhwATExMbC2tsaoUaPg7OzMPEOoY8eOcHV1FfjjjIuLw6BBg9C5c2e8fPkSb968qbOYX9L+f1uA/+3pYlOm0OGvenqDo6MjM83e3h7Kysr4/vvvAQADBw7Ef//7X5w+fRru7u64e/cuBgwYgF9//VVofXZ2djA3N8eBAwcQGxuLqVOnChwtq5w8eRIfPnzAiBEjAHwdbHno0KE11gSLoq6uji5dujDv27VrBxMTE4HTinbt2uH58+fM+5kzZzLn558/f0ZiYiLzL5MYZRv16v+3Bfi6urpib0feFLqMkcvlAoDAc2709fVhYWEhMB5sixYtMGDAAAwYMAABAQEIDQ3FmjVrEBAQIPSMnOnTpyM6Ohp5eXm4evWqyO3GxMTg9evXaNmyJTONz+fj1q1bWL16NZSU6j7mfFvGyOFwRE7j8/nM+9GjR4PL5SI1NRWqqqqoqKiAu7s7s986Ojp1FvNL2n9TU1OpnkUkTwp95Dc1NYWSkhIePHjATDt8+DAeP35c65HQ2toalZWVIr/hmDx5Mm7fvg0bGxtYW1sLzX/16hUOHTqEPXv2CBwRc3Jy8ObNG5w4cUI2OyeCiooKvL29ERcXh7i4OEyaNIkJsJKSEiZNmoTdu3eLLH4vLS1FZWWlXPvf2BT6yK+jo4MffvgBYWFhsLe3R0FBAdLT09GyZUucOXMGQ4YMgZOTEzw9PdGnTx/o6ekhLy8PQUFBcHZ2FjnotK6uLng8Xo0F5n/88Qf09PTg4eEh9PyeESNGICYmps4BqaXh6+sLK6uvD/T99tolLCwMmZmZcHBwQFhYGPr06YMWLVrg/PnziIiIwLVr1+Te/8Ykdfgb4ocnWYqOjoavry8MDQ2hrKyMjRs3gsvlYsqUKQgLC4OrqysSEhIQFBSEjx8/okOHDhg1ahRWrVpV4zpr+2c+NjZW6MeiKuPHj8fUqVPx8uVLgdMuWTI3N0e/fv3w+vVrODg4CMxr3bo1srKysHbtWoSGhuLRo0fQ1dWFra0tIiMjoa2tLXb/FQEtYFcwhBCYm5tjzpw5WLRokby70yBklUeFPu1hmxcvXmDPnj0oLi7GtGnT5N2dJk+hL3ibKjc3txpHTg8PD6/3etu2bYs1a9Zg+/btzeorR3mhR3452LFjB8rKykTOk2bEFUm+y6do+OXC0NBQ3l2gQE97KBaj4adYi4afYi0afoq1aPgp1qLhp1hLBuHf3ogvyb148QKzZ89Gp06dwOVyYWBgAFdXV4SFhYHD4dT6yszMRHx8PPNeSUkJ7du3x8SJE/H48WOhbd29exceHh7Q19cHl8uFhYUFVq1aJTRul4mJCTgcDvbs2SO0jm7duoHD4QjVCwNfR4hXVlYWOVJMXcrKyhAcHAwLCwtwuVy0adMGEyZMwN27d4X6VdPLx8cHwNdbqQ8ePCi0DR8fH6ZIqOq9qPVUvzGu+jbV1dVha2uLHTt2SLx/9aHwR/7x48cjJycHCQkJKCgowOHDh+Hk5ARbW1uBwmsPDw8MHz5cYFrVINStWrUCj8fD06dPkZycjPv372PChAkC28nKyoKDgwM+f/6Mo0ePoqCgAGFhYYiPj8fQoUPx+fNngfZGRkZCg0lnZWWhuLhY5JjBQN0jxNekvLwcLi4uiI2NRWhoKAoKCpCWlobKyko4ODggKysLAHDt2jVm35OTkwEIFqh/O/6YOL79THk8HpKSkgTarFmzBjweD3fu3IGXlxdmzpyJY8eOSbwtSSn0j1wlJSU4f/48MjMzMWjQIACAsbEx7O3thdq2bNkS5eXlzGjn1XE4HGZ6+/btMWPGDMyfPx/v3r1Dq1atQAjBjBkzYGVlhZSUFKbYw9jYGBYWFujZsyeioqIExs+dMmUKoqKi8Pfff8PIyAjA13BPmTIFO3fuFOqDOCPE12Tjxo24fPkycnJymOFWjY2NkZycDAcHB8yYMQN37tyBvr4+s0zVL811jRBfl6p/bWujpaXFtAkICMD69etx8uRJuLm51Xu74lDoI3/V/TIHDx5EeXm5TNb5/PlzpKamQllZGcrKygCA3Nxc5OXlYdGiRUJVWnZ2dnBxcRE62rVr1465nRr4WrC+d+/eGgvixR0hXpTExEQMHTqUCX4VJSUlLFy4EHl5ebh586bY62sofD4fycnJePPmTaOMMq/Q4VdRUUF8fDwSEhKgo6OD/v37IygoCLdu3ZJoPW/fvoWmpiY0NDTQrl07ZGRkwM/Pjzk9KSgoAACmiORbVlZWTJvqpk+fjvj4eBBCcODAAXTp0gU9evQQaifJCPGiFBQU1Nq36vsgLk9PT6Gb8nbv3i3U7siRI3XevBcQEABNTU1wuVy4u7tDV1cXvr6+EvWnPhQ6/MDXc/5nz57h8OHDGD58ODIzM9GrVy+RF5Q10dLSQm5uLrKzs7Fhwwb06tULYWFhQu0kvbFs5MiRKC0txblz5xAbG1vjUV/SEeJFkfVNb1FRUQJljrm5ucxDAapzdnYWajdr1iyBNkuWLEFubi7OnDkDBwcHREVFwczMTKb9FUWhz/mrqKmpYejQoRg6dChWrlwJX19fBAcHM99e1EVJSYn5n2FlZYWHDx9i9uzZ+OOPPwB8HXUdAPLz89GzZ0+h5fPz85k21amoqGDq1KkIDg7GlStXkJqaKnL71UeIr8Ln8xEbG4sZM2bU2X8LCwvk5+eLnFc1XVT/amNgYCAUUC0tLaFBtjU0NOoMcps2bZgR6Pfv3w9bW1v06dNHZI20LCn8kV8Ua2trkSOyiyswMBB79+7FjRs3AHw9Enft2hVRUVECT1MAgJs3b+LUqVPw9PQUua7p06fj7NmzGDNmjMh78Os7Qnx1kyZNwqlTp4TO6/l8PqKiomBtbS10PSAvRkZGmDhxIpYtW9bg21LoI/+rV68wYcIETJ8+Hd27d4eWlhays7Oxfv165iG09WFkZIRx48Zh1apVOHLkCDgcDmJiYjB06FCMHz8ey5Ytg4GBAa5cuYJffvkFjo6O8Pf3F7kuKysrvHz5Eurq6iLnx8TE1GuE+OoWLlyIQ4cOYfTo0diwYQMcHBzwzz//IDw8HPn5+Th16pTIml1ZKC8vR3FxscA0FRWVWmuYFyxYABsbG2RnZ6NPnz4N0i9AJuH/SfpVNBBNTU3mHPLhw4eoqKiAkZERZs6ciaCgIKnWvXDhQjg6OuLq1auwt7dHv379kJWVhdWrV8PNzQ3v379Hp06d4O3tjWXLljHPEBJFT09P5PTPnz9j165dAl+RVjd+/Hhs2LAB4eHhNT5NAvh62nfmzBmEh4cjKCgIjx49gpaWFpydnZGVlSXyGZ+ykp6eLvQgX0tLy1r/xbK2tsawYcOwatUqpKWlNVjfaAE71ezQEdgpSko0/AqgW7duNRbEi/runfpKoS942SItLQ0VFRUi57Vr166Re9N80PArAGNjY3l3oVmipz0Ua9HwU6xFw0+xFg0/xVo0/BRr0fBTrCX1V52HJ4sukmgI3yeKvi23Nj4+Pky1VHWurq5IT0+HiYkJHj16hMuXL+O7775j5vv7+zN3TwJASEgIVq9eDQBQVlaGjo4OrK2t8cMPP2D27NlC9+7cvXsXq1evRkZGBt69ewdjY2NMmjQJgYGBUFdXr3EE8+oyMjJQVFQEf39/oVuFga/llampqUzReE03pyUlJWHSpEnMNt+8eSOyNDEkJAQHDx5Ebm6uwPQnT56gc+fOsLCwwJ07d+rsR3PBiiN/XUXUampqNd48Vl23bt3A4/Hw+PFjZGRkYMKECYiIiEC/fv0ExvEVp5i9X79+YhfQSyIuLk5oX6UNZXx8PDw8PPDu3TtcuXJFqnU1Jaz4kauuIuqffvoJW7duRVpaGjP8pigqKirMejp06ABbW1umNnbdunUIDQ2VqJi9ep9qK6CXhI6OjtTrqI4Qgri4OGzevBkdO3ZETEyM0HBHzRUrjvx1MTU1xaxZs7Bs2TKhYpS6dO3aFW5ubkhJSQFQv2L2piwjIwMfP36Ei4sLvLy8sGfPHqkKgZoSVoRfnCLqFStWoLCwsF43gnXt2hVFRUUA6l/MXpuqAvpvX6KIKiwX9YAtccXExGDSpElQVlaGjY0NOnfujP3799d7fU0JK057nJ2dsWXLFoFp346Aoq+vj8WLF2PVqlWYOHGiROsnhAhdbMqyYFxLS4spmazO3NxcaFpUVBRcXFwEpnXo0KFe2y0pKUFKSgouXLjATPPy8kJMTIzY9c9NGSvCL04RNQAsWrQImzdvxubNmyVaf35+PkxNTQHUv5i9NtUL6OsiqrC8vhITE/Hp0yeBc3xCCPh8PgoKCiTej6aGFac94tLU1MTKlSsRFhYm8O1Nbe7du4f09HSMHz8egHTF7E1NTEwMfvnlF4HC+Zs3b2LAgAESPzKxKWLFkV+SIuqffvoJUVFRSExMFPpWo7KyEsXFxeDz+Xj16hUyMzMRGhqKHj16YMmSJQAgVTG7LJSUlAjtq5aWlsDzP2/fvg0tLS3mPYfDEXp6Q25uLm7cuIHdu3eja9euAvM8PT2xZs0ahIaGMo9TKSwsFPp9wNzcvMbnjjYJRAxlZWUkLy+PlJWVidO8SfH29iYAhF6WlpaEEEKMjY1JVFSUwDKJiYkEABk0aBAzLTg4mFlWWVmZtG7dmvzrX/8iUVFR5NOnT0LbvXXrFhk/fjxp3bo1adGiBenSpQtZsWIF+fDhQ439HDNmjND0uLg4oq2tLXIZACQ1NVXgvahXREQEIYSQjIwMkfOVlZWZfbSzsyOEEDJ37lxibW0tcrs8Ho8oKSmRQ4cO1brd8+fPi1xeWrLKIy1gp5odWsBOUVKi4adYi4afYi0afoq1JAq/GNfGFNXgZJVDscJf9RzIbwdWoyh5qBrfrGpknPoS60euquKN58+fAwDU1dUb7Km+FFUbPp+PFy9eQF1dXWC8gvoQe+mqe8Sr/gAoSl6UlJTQqVMnqQ/AYv3IVd2XL19qfDQeRTUGVVVVoVqJ+pA4/BSlKOhXnRRr0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsdb/A22CHYqSLKorAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Legend of cell type info only\n", "g = plt.figure(figsize = (1,1)).add_subplot(111)\n", "g.axis('off')\n", "handles = []\n", "for item in cellsubtype_color_dict.keys():\n", " h = g.bar(0,0, color = cellsubtype_color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell subtype'),\n", "\n", "\n", "filename = \"Cellsubtype_legend.png\"\n", "filename = os.path.join(metadata_images_dir, filename)\n", "plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "markdown", "id": "3c0e92c5-e026-4868-a5f3-20cee185d893", "metadata": {}, "source": [ "## II.7. IMMUNE CHECKPOINT COLORS" ] }, { "cell_type": "code", "execution_count": 68, "id": "e4b41aab-bdd0-41b0-a39e-0fe3e74fe830", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nuc_XSample_IDNucleus_RoundnessNuc_Y_InvROI_indexCell_SizeNucleus_SizeAF488_Cell_Intensity_AverageAF488_Cytoplasm_Intensity_AverageAF488_Nucleus_Intensity_Average...r8c2_Nucleus_Intensity_AverageSting_Cell_Intensity_AverageSting_Cytoplasm_Intensity_AverageSting_Nucleus_Intensity_AverageVimentin_Cell_Intensity_AverageVimentin_Cytoplasm_Intensity_AverageVimentin_Nucleus_Intensity_Averagecell_typecell_subtypeimmune_checkpoint
ID
DD3S3_Cell_0823.567871DD3S3.csv0.83532415699.3828120281816.6544966.2771677.586173...11.7856791.3585621.3188351.45665419.08168516.42890025.631769nonenonenone
DD3S3_Cell_1852.840027DD3S3.csv0.52342115690.5332030200758.4106678.1669608.816844...13.0325331.8560051.9543761.69205320.05023320.90330718.628444nonenonenone
DD3S3_Cell_2868.272705DD3S3.csv0.68614715682.99414104251659.5478209.21682010.069394...14.5929701.9695532.1713041.65164213.03458813.99384611.523031nonenonenone
DD3S3_Cell_4704.337280DD3S3.csv0.75762315683.059570041816911.16341310.06294512.784813...17.3259171.3984761.3019321.54072212.41067011.19111114.207534nonenonenone
DD3S3_Cell_5852.893799DD3S3.csv0.71465115683.01757802011137.8871146.9623868.607257...13.0637171.3286021.1446821.4718329.7181758.03901511.025841nonenonenone
..................................................................
DD3S2_Cell_9124111890.440430DD3S2.csv0.6543411231.699951451855011.22288211.6902479.961000...13.3104002.1548592.3322441.67592011.00108111.4422719.809867nonenonenone
DD3S2_Cell_9124311915.123047DD3S2.csv0.8542571228.222168452038110.50607610.9072959.901769...13.5925932.3562562.4322052.24186410.54541910.82595610.122881nonenonenone
DD3S2_Cell_9124411961.339844DD3S2.csv0.6515631230.939941451575011.33730411.76953310.412333...13.6800002.1767072.1055612.32896011.36407611.82859810.370000nonenonenone
DD3S2_Cell_9124511969.869141DD3S2.csv0.7645021230.717407451154610.03979710.5130929.329855...12.8521742.3463562.5265512.07606511.52382612.6726579.800579nonenonenone
DD3S2_Cell_9124711965.208984DD3S2.csv0.8730941224.179077451466710.22598110.38818610.034727...13.6137322.2129382.2295952.19329916.50438321.88607610.158806nonenonenone
\n", "

350554 rows × 118 columns

\n", "
" ], "text/plain": [ " Nuc_X Sample_ID Nucleus_Roundness Nuc_Y_Inv \\\n", "ID \n", "DD3S3_Cell_0 823.567871 DD3S3.csv 0.835324 15699.382812 \n", "DD3S3_Cell_1 852.840027 DD3S3.csv 0.523421 15690.533203 \n", "DD3S3_Cell_2 868.272705 DD3S3.csv 0.686147 15682.994141 \n", "DD3S3_Cell_4 704.337280 DD3S3.csv 0.757623 15683.059570 \n", "DD3S3_Cell_5 852.893799 DD3S3.csv 0.714651 15683.017578 \n", "... ... ... ... ... \n", "DD3S2_Cell_91241 11890.440430 DD3S2.csv 0.654341 1231.699951 \n", "DD3S2_Cell_91243 11915.123047 DD3S2.csv 0.854257 1228.222168 \n", "DD3S2_Cell_91244 11961.339844 DD3S2.csv 0.651563 1230.939941 \n", "DD3S2_Cell_91245 11969.869141 DD3S2.csv 0.764502 1230.717407 \n", "DD3S2_Cell_91247 11965.208984 DD3S2.csv 0.873094 1224.179077 \n", "\n", " ROI_index Cell_Size Nucleus_Size \\\n", "ID \n", "DD3S3_Cell_0 0 281 81 \n", "DD3S3_Cell_1 0 200 75 \n", "DD3S3_Cell_2 0 425 165 \n", "DD3S3_Cell_4 0 418 169 \n", "DD3S3_Cell_5 0 201 113 \n", "... ... ... ... \n", "DD3S2_Cell_91241 45 185 50 \n", "DD3S2_Cell_91243 45 203 81 \n", "DD3S2_Cell_91244 45 157 50 \n", "DD3S2_Cell_91245 45 115 46 \n", "DD3S2_Cell_91247 45 146 67 \n", "\n", " AF488_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 6.654496 \n", "DD3S3_Cell_1 8.410667 \n", "DD3S3_Cell_2 9.547820 \n", "DD3S3_Cell_4 11.163413 \n", "DD3S3_Cell_5 7.887114 \n", "... ... \n", "DD3S2_Cell_91241 11.222882 \n", "DD3S2_Cell_91243 10.506076 \n", "DD3S2_Cell_91244 11.337304 \n", "DD3S2_Cell_91245 10.039797 \n", "DD3S2_Cell_91247 10.225981 \n", "\n", " AF488_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 6.277167 \n", "DD3S3_Cell_1 8.166960 \n", "DD3S3_Cell_2 9.216820 \n", "DD3S3_Cell_4 10.062945 \n", "DD3S3_Cell_5 6.962386 \n", "... ... \n", "DD3S2_Cell_91241 11.690247 \n", "DD3S2_Cell_91243 10.907295 \n", "DD3S2_Cell_91244 11.769533 \n", "DD3S2_Cell_91245 10.513092 \n", "DD3S2_Cell_91247 10.388186 \n", "\n", " AF488_Nucleus_Intensity_Average ... \\\n", "ID ... \n", "DD3S3_Cell_0 7.586173 ... \n", "DD3S3_Cell_1 8.816844 ... \n", "DD3S3_Cell_2 10.069394 ... \n", "DD3S3_Cell_4 12.784813 ... \n", "DD3S3_Cell_5 8.607257 ... \n", "... ... ... \n", "DD3S2_Cell_91241 9.961000 ... \n", "DD3S2_Cell_91243 9.901769 ... \n", "DD3S2_Cell_91244 10.412333 ... \n", "DD3S2_Cell_91245 9.329855 ... \n", "DD3S2_Cell_91247 10.034727 ... \n", "\n", " r8c2_Nucleus_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 11.785679 \n", "DD3S3_Cell_1 13.032533 \n", "DD3S3_Cell_2 14.592970 \n", "DD3S3_Cell_4 17.325917 \n", "DD3S3_Cell_5 13.063717 \n", "... ... \n", "DD3S2_Cell_91241 13.310400 \n", "DD3S2_Cell_91243 13.592593 \n", "DD3S2_Cell_91244 13.680000 \n", "DD3S2_Cell_91245 12.852174 \n", "DD3S2_Cell_91247 13.613732 \n", "\n", " Sting_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1.358562 \n", "DD3S3_Cell_1 1.856005 \n", "DD3S3_Cell_2 1.969553 \n", "DD3S3_Cell_4 1.398476 \n", "DD3S3_Cell_5 1.328602 \n", "... ... \n", "DD3S2_Cell_91241 2.154859 \n", "DD3S2_Cell_91243 2.356256 \n", "DD3S2_Cell_91244 2.176707 \n", "DD3S2_Cell_91245 2.346356 \n", "DD3S2_Cell_91247 2.212938 \n", "\n", " Sting_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1.318835 \n", "DD3S3_Cell_1 1.954376 \n", "DD3S3_Cell_2 2.171304 \n", "DD3S3_Cell_4 1.301932 \n", "DD3S3_Cell_5 1.144682 \n", "... ... \n", "DD3S2_Cell_91241 2.332244 \n", "DD3S2_Cell_91243 2.432205 \n", "DD3S2_Cell_91244 2.105561 \n", "DD3S2_Cell_91245 2.526551 \n", "DD3S2_Cell_91247 2.229595 \n", "\n", " Sting_Nucleus_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 1.456654 \n", "DD3S3_Cell_1 1.692053 \n", "DD3S3_Cell_2 1.651642 \n", "DD3S3_Cell_4 1.540722 \n", "DD3S3_Cell_5 1.471832 \n", "... ... \n", "DD3S2_Cell_91241 1.675920 \n", "DD3S2_Cell_91243 2.241864 \n", "DD3S2_Cell_91244 2.328960 \n", "DD3S2_Cell_91245 2.076065 \n", "DD3S2_Cell_91247 2.193299 \n", "\n", " Vimentin_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 19.081685 \n", "DD3S3_Cell_1 20.050233 \n", "DD3S3_Cell_2 13.034588 \n", "DD3S3_Cell_4 12.410670 \n", "DD3S3_Cell_5 9.718175 \n", "... ... \n", "DD3S2_Cell_91241 11.001081 \n", "DD3S2_Cell_91243 10.545419 \n", "DD3S2_Cell_91244 11.364076 \n", "DD3S2_Cell_91245 11.523826 \n", "DD3S2_Cell_91247 16.504383 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 16.428900 \n", "DD3S3_Cell_1 20.903307 \n", "DD3S3_Cell_2 13.993846 \n", "DD3S3_Cell_4 11.191111 \n", "DD3S3_Cell_5 8.039015 \n", "... ... \n", "DD3S2_Cell_91241 11.442271 \n", "DD3S2_Cell_91243 10.825956 \n", "DD3S2_Cell_91244 11.828598 \n", "DD3S2_Cell_91245 12.672657 \n", "DD3S2_Cell_91247 21.886076 \n", "\n", " Vimentin_Nucleus_Intensity_Average cell_type cell_subtype \\\n", "ID \n", "DD3S3_Cell_0 25.631769 none none \n", "DD3S3_Cell_1 18.628444 none none \n", "DD3S3_Cell_2 11.523031 none none \n", "DD3S3_Cell_4 14.207534 none none \n", "DD3S3_Cell_5 11.025841 none none \n", "... ... ... ... \n", "DD3S2_Cell_91241 9.809867 none none \n", "DD3S2_Cell_91243 10.122881 none none \n", "DD3S2_Cell_91244 10.370000 none none \n", "DD3S2_Cell_91245 9.800579 none none \n", "DD3S2_Cell_91247 10.158806 none none \n", "\n", " immune_checkpoint \n", "ID \n", "DD3S3_Cell_0 none \n", "DD3S3_Cell_1 none \n", "DD3S3_Cell_2 none \n", "DD3S3_Cell_4 none \n", "DD3S3_Cell_5 none \n", "... ... \n", "DD3S2_Cell_91241 none \n", "DD3S2_Cell_91243 none \n", "DD3S2_Cell_91244 none \n", "DD3S2_Cell_91245 none \n", "DD3S2_Cell_91247 none \n", "\n", "[350554 rows x 118 columns]" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Assign IMMUNE SUBTYPES\n", "df['cell_subtype'] = df['cell_type'].copy()\n", "df['immune_checkpoint'] = 'none'\n", "df" ] }, { "cell_type": "raw", "id": "984f9fe2-41d1-4c00-be20-e2d6ca9d2394", "metadata": {}, "source": [ "immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'None']\n", "color_values = sb.color_palette(\"husl\",n_colors=len(immune_checkpoint))\n", "# each color value is a tuple of three values: (R, G, B)\n", "\n", "print(\"Unique immune checkpoint are:\",df.immune_checkpoint.unique())\n", "# Display those unique colors\n", "sb.palplot(sb.color_palette(color_values))" ] }, { "cell_type": "code", "execution_count": 69, "id": "3b593828-a016-4d52-b722-9908aac4e2d1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unique immune checkpoint combinations are: ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAABlCAYAAACBS66rAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAADBUlEQVR4nO3aPW4TYRSG0WsT4oBkW0pp2VV6dsBmYBEgUaShZydIrAcpRUZyB0WMwPwkQ4GgwjC23miYcE47U1xdW348/jxq27YtAAga9z0AAHePuAAQJy4AxIkLAHHiAkCcuAAQJy4AxB11uenm5qbW63VNp9MajUa3PRMA/6i2bWuz2dRisajxePfzSae4rNfrWq1WseEAGLamaWq5XO683iku0+m0qqrePn1e0+OTzGT/gSePX/c9wiC9+/Cm7xEG6en7Z32PMDgX9x/1PcLgfN5u69WLl7+6sEunuPz8KWx6fFKzibh0df/hvb5HGKSj6z+/afm9ByfHfY8wOCe+LB/sb0ckDvQBiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiDvqclPbtlVVtfmyvdVh7pqvH6/7HmGQvn3a9D3CIH3aful7hMHZXvtM29fn7Y+d/ezCLqP2b3dU1cXFRZ2dnWUmA2Dwmqap5XK583qnJ5fT09Oqqrq8vKz5fJ6Z7D9wdXVVq9Wqmqap2WzW9ziDYGeHsbf92dlh2ratzWZTi8Xij/d1ist4/ONoZj6fexEOMJvN7G1PdnYYe9ufne2vy0OGA30A4sQFgLhOcZlMJnV+fl6TyeS257lT7G1/dnYYe9ufnd2uTv8WA4B9+FkMgDhxASBOXACIExcA4sQFgDhxASBOXACIExcA4r4DOnx2ZebrFYYAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']\n", "\n", "# Base colors for the primary checkpoints\n", "base_colors = sb.color_palette(\"husl\", n_colors=3) # Three distinct colors\n", "\n", "# Function to mix two RGB colors\n", "def mix_colors(color1, color2):\n", " return tuple((c1 + c2) / 2 for c1, c2 in zip(color1, color2))\n", "\n", "# Generate mixed colors for the combinations of checkpoints\n", "mixed_colors = [\n", " mix_colors(base_colors[0], base_colors[1]), # Mix B7H4 and PDL1\n", "# mix_colors(base_colors[0], base_colors[2]), # Mix B7H4 and PD1\n", "# mix_colors(base_colors[1], base_colors[2]), # Mix PDL1 and PD1\n", " tuple(np.mean(base_colors, axis=0)) # Mix B7H4, PDL1, and PD1\n", "]\n", "\n", "# Adding the color for 'None'\n", "#none_color = [(0.8, 0.8, 0.8)] # A shade of gray\n", "\n", "# Combine all colors into one list\n", "color_values = base_colors + mixed_colors #+ none_color\n", "\n", "# Display unique immune checkpoint combinations\n", "print(\"Unique immune checkpoint combinations are:\", immune_checkpoint)\n", "# Display the unique colors\n", "sb.palplot(color_values)\n" ] }, { "cell_type": "code", "execution_count": 70, "id": "ce2c7c21-f90b-4d06-a93f-418d9aae4304", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'B7H4': (0.9677975592919913, 0.44127456009157356, 0.5358103155058701),\n", " 'PDL1': (0.3126890019504329, 0.6928754610296064, 0.1923704830330379),\n", " 'PD1': (0.23299120924703914, 0.639586552066035, 0.9260706093977744),\n", " 'B7H4_PDL1': (0.6402432806212122, 0.56707501056059, 0.36409039926945397),\n", " 'None': (0.5044925901631545, 0.5912455243957383, 0.5514171359788941)}" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Store in a dctionnary\n", "immunecheckpoint_color_dict = dict(zip(immune_checkpoint, color_values))\n", "immunecheckpoint_color_dict" ] }, { "cell_type": "code", "execution_count": 71, "id": "2f38cc65-50f3-463d-be1b-9fc1f8c90aa1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/immunecheckpoint_color_data.csv was created!\n" ] } ], "source": [ "# Save color information (mapping and legend) to metadata directory\n", "# Create dataframe\n", "immunecheckpoint_color_df = color_dict_to_df(immunecheckpoint_color_dict, \"immune_checkpoint\")\n", "immunecheckpoint_color_df.head()\n", "\n", "# Save to file in metadatadirectory\n", "filename = \"immunecheckpoint_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "immunecheckpoint_color_df.to_csv(filename, index = False)\n", "print(\"File \" + filename + \" was created!\")" ] }, { "cell_type": "code", "execution_count": 72, "id": "9e2ae256-bbc3-4e93-93bb-560ffa02fb71", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAK8AAACcCAYAAAAJfPt2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWC0lEQVR4nO3de1BTZ/oH8G+4qAQCkWtorQZEFHZhEYUWFKXgSmFRpI4s1YrQwa5YL2i7gru1UO14oSJabe0WHMqv7nbAadUsKriAuJi6IhSorSy6GbytsFTBUW5Vw/v7A3LGmIBJuB55PjPMcK7vm/DN4eSc85wjYIwxEMJDRsPdAUIMReElvEXhJbxF4SW8ReElvEXhJbxF4SW8ReElvEXhJbxF4SW8ReElvEXhJbxF4SW8ReElvGWi7wJKpRKPHj0ajL6QUcLU1BTGxsb9Xo/O4WWMobGxEffu3et3o4SIxWJIJBIIBAKD16FzeFXBtbe3h1Ao7FejZPRijKG9vR1NTU0AAEdHR4PXpVN4lUolF1wbGxuDGyMEAMzMzAAATU1NsLe3N3gXQqcvbKp9XKFQaFAjhDxNlaX+fH/S62gD7SqQgTIQWaJDZYS3KLyDJDAwEImJiYPahlQqxd69ewe1jS+//BJisXhQ2zDUgIQ3NjYWixYtGohVkRHm97//Pa5cuaLXMkPxwQUMOElBRhczMzPu6MBIMyi7DYGBgVi7di0SExMxfvx4ODg4IDMzE21tbYiLi4NIJIKLiwtOnTrFLVNaWgqBQIDCwkJMnz4dZmZmCAoKQlNTE06dOgU3NzdYWlpi6dKlaG9v55bT9q/Ty8sLqamp3LBAIEBWVhYiIyMhFAoxZcoUyGQytWV+/PFHhIaGwsLCAg4ODli+fDnu3LnT5+uUy+UIDAyEUCjE+PHjERISgpaWFm56V1cXNm3aBGtra0gkErU+AcC9e/cQHx8POzs7WFpaIigoCDU1NWrz/P3vf4ePjw/GjRsHW1tbREZG9tqfrKwsiMViFBcXc3+HNWvWYM2aNbCysoKtrS22bNmCJ+8z09LSgpiYGIwfPx5CoRChoaG4evUqN/3p3YbU1FR4eXnhq6++glQqhZWVFaKjo/HgwQMA3f+Fz549i3379kEgEEAgEODatWt9vo+GGrR93pycHNja2qK8vBxr165FQkIClixZAn9/f3z//feYP38+li9frhZEoPvNOXDgAL777jvcvHkTUVFR2Lt3L/72t7/hxIkTOH36NPbv3693fz788ENERUXhhx9+QFhYGJYtW4bm5mYA3SEKCgrC9OnTUVFRgYKCAvzvf/9DVFRUr+urrq5GcHAw3N3dcf78eZw7dw4LFiyAUqlUew/Mzc1x4cIFpKWlYevWrfjHP/7BTV+yZAn34aysrIS3tzeCg4O5fp04cQKRkZEICwtDVVUViouL4evrq7U/aWlpSE5OxunTpxEcHKzWBxMTE5SXl2Pfvn3Ys2cPsrKyuOmxsbGoqKiATCbD+fPnwRhDWFhYn4ewFAoFjh07hvz8fOTn5+Ps2bPYuXMnAGDfvn3w8/PDypUr0dDQgIaGBrz00kt9/WkMx3TQ0dHBLl++zDo6OrROX7FiBYuIiOCG586dy2bPns0NP378mJmbm7Ply5dz4xoaGhgAdv78ecYYY2fOnGEAWFFRETfPjh07GACmUCi4cX/4wx9YSEgINzxp0iSWkZGh1p/f/OY3LCUlhRsGwN5//31uuLW1lQFgp06dYowxtm3bNjZ//ny1ddy8eZMBYHV1dVpf8xtvvMFmzZqldZq294Axxnx8fFhSUhJjjLGysjJmaWnJOjs71eaZPHky+8tf/sIYY8zPz48tW7as1zZUr33Tpk3M0dGR/fjjjxp9cHNzY11dXdy4pKQk5ubmxhhj7MqVKwwAk8vl3PQ7d+4wMzMzlpeXxxhjLDs7m1lZWXHTU1JSmFAoZPfv3+fG/fGPf2Qvv/yyWrvr16/vtd+MPTtTuhi0fV5PT0/ud2NjY9jY2MDDw4Mb5+DgAADcaUJtyzk4OEAoFMLZ2VltXHl5eb/6Y25uDktLS67tmpoanDlzBhYWFhrLKRQKuLq6aoyvrq7GkiVLdG4T6D4V+mSbra2tGmcsOzo6oFAouDZWrlzZZxvp6eloa2tDRUWF2vuk8sorr6gdU/Xz80N6ejqUSiVqa2thYmKCl19+mZtuY2ODqVOnora2ttc2pVIpRCKR1tc1lAYtvKampmrDAoFAbZzqDe3q6up1uaeXUY17chkjIyO1fThA+1mbvtbT2tqKBQsWYNeuXRrL9XbuXZcvMc9q09HREaWlpRrLqfYxdWkjICAAJ06cQF5eHpKTk585/0B41t9kqPD+OK+dnR0aGhq44fv376O+vl6vdXh7e+Onn36CVCqFi4uL2o+5ubnWZTw9PbkvRobw9vZGY2MjTExMNNq0tbXVuQ1fX1+cOnUK27dvx+7duzWmX7hwQW34X//6F6ZMmQJjY2O4ubnh8ePHavPcvXsXdXV1cHd3N/i1jRkzRm3ff7DwPrxBQUH46quvUFZWhkuXLmHFihV6X+jxzjvvoLm5GW+88QYuXrwIhUKBwsJCxMXF9fpH2Lx5My5evIjVq1fjhx9+wL///W8cPHjwmUcoVObNmwc/Pz8sWrQIp0+fxrVr1/Ddd9/hz3/+MyoqKgAAKSkp+Prrr5GSkoLa2lpcunRJ638Hf39/nDx5Eh9++KHGkZcbN25g48aNqKurw9dff439+/dj/fr1AIApU6YgIiICK1euxLlz51BTU4M333wTL774IiIiIvR4B9VJpVJcuHAB165dw507dwZtq8z78G7evBlz585FeHg4fve732HRokWYPHmyXut44YUXIJfLoVQqMX/+fHh4eCAxMRFisRhGRtrfIldXV5w+fRo1NTXw9fWFn58fjh8/DhMT3fbEBAIBTp48iTlz5iAuLg6urq6Ijo7G9evXue8DgYGBOHLkCGQyGby8vBAUFNTr/v7s2bNx4sQJvP/++2pHY2JiYtDR0QFfX1+88847WL9+Pd5++21uenZ2NmbMmIHw8HD4+fmBMYaTJ09q7Bro47333oOxsTHc3d1hZ2eHGzduGLyuvgjY0zuMWnR2dqK+vh5OTk4YN27coHSEDLzAwEB4eXkN+ilkQwxEpni/5SWjF4WX8BZd2/Ac03YY7nlCW17CWxRewlsUXsJbFF7CWxRewlsUXsJbFF7CW0NynLdzY9pQNMMZt2eTXvPHxsYiJyeHG7a2toaPjw/S0tLg6emJ0tJSvPrqq1qXLS8vh4+PDzdPS0uLRrWtVCpFYmKiRlEi66laKCgowNGjR6mIVU+05e3x2muvcWUrxcXFMDExQXh4OIDuq7ZU01Q/8fHxcHJywsyZMw1uc+/evXQjl36gM2w9xo4dC4lEAgCQSCRITk5GQEAAfv75Z9jZ2XHTgO6L3Y8fP461a9caHL7q6mqkp6ejoqKiXzebG81oy6tFa2srDh8+DBcXF603FpTJZLh79y7i4uIMWn97ezuWLl2KTz/9VO1DQfRDW94e+fn5XA1bW1sbHB0dkZ+fr/V63kOHDiEkJAQTJkzQmKZt3NMV0hs2bIC/v3+/LvgmFF7Oq6++ioMHDwLovpfBZ599htDQUJSXl2PSpEncfLdu3UJhYSHy8vK0rqesrEytOBHovq5WRSaToaSkBFVVVQP/IkYZCm8Pc3NzuLi4cMNZWVmwsrJCZmYmPvroI258dnY2bGxssHDhQq3rcXJy0jja8GR1RUlJCRQKhcY8ixcvRkBAwHN/JdhAovD2QiAQwMjICB0dHdw4xhiys7MRExNjcJlMcnIy4uPj1cZ5eHggIyMDCxYs6FefRxsKb49ffvkFjY2NALp3Gw4cOMCVxKuUlJSgvr5eI3z6kEgkWr+kTZw4EU5OTgavdzSi8PYoKCjgDlmJRCJMmzYNR44cUdtfPXToEPz9/TFt2rRh6iV5EhVgkmFBBZhkVKPwEt6i8BLeovAS3qLwEt6i8BLeovAS3qLwEt6i8BLeovAS3hqSaxsiZd5D0Qzn6MLv9Zr/yQJMU1NTTJw4ETExMfjTn/6Ec+fOccWXAoEAIpEIzs7O+O1vf4sNGzaolfCkpqbi2LFjqK6u1trOt99+i88//xyVlZVobm5GVVUVvLy8DHqNhLa8HFUB5tWrV/Huu+8iNTUVH3/8MTe9rq4Ot2/fxsWLF5GUlISioiL8+te/xqVLl3Ruo62tDbNnz9Z6a36iP7qqrMeTBZgJCQk4evQoZDIZ/Pz8AAD29vYQi8WQSCRwdXVFREQEpk+fjoSEBJw7d06nNpYvXw4Ag/ZEyNGGtry9MDMzw8OHD/ucvmrVKsjl8mF5Bhmh8GpgjKGoqAiFhYUICgrqc17Vdb20JR0etNvQQ1U9/OjRI3R1dWHp0qVITU3FxYsXe11GdSk03ThkeFB4e6iqh8eMGYMXXnhBp0dSqR5xKpVKB7l3RBsKb4+nq4efpaOjA1988QXmzJkDOzu7QewZ6Q2FV0dNTU3o7OzEgwcPUFlZibS0NNy5cwfffvut2nwdHR0ax3lFIhEmT56M5uZm3LhxA7dv3wbQffgN6L0ok/RtSMKr70mDkWjq1KkQCASwsLCAs7Mz5s+fj40bN2qE7sqVK5g+fbrauODgYBQVFUEmk6ndIio6OhpA92NaU1NTB/01PG+oAJMMCyrAJKMahZfwFoWX8BaFl/AWhZfwFoWX8BaFl/AWhZfwFoWX8BaFl/DWkFzbMPvw3aFohnPuTc3HT/VloAowf/rpJ3zwwQeorKzE9evXkZGRofHUSzJwaMvbYyAKMNvb2+Hs7IydO3fSVWJDgMLbQ1WAOWnSJCQkJGDevHmQyWTcdHt7e674Mjo6GnK5HHZ2dkhISODm8fHxwccff4zo6GiMHTt2OF7GqELh7QUVYI58FN6nUAEmf1AlRQ8qwOQfCm8PKsDkHwpvDyrA5B8Kr450KcB8+PAhLl++zP3+3//+F9XV1bCwsNDrg0F0MyTh1fekwUikSwHm7du31Yovd+/ejd27d2Pu3Ln0QOxBQAWYZFhQASYZ1Si8hLcovIS3KLyEtyi8hLcovIS3KLyEtyi8hLcovIS3KLyEt4bk2obDn8QMRTOcN9f9n17zP1mACQDW1tbw8fFBWloaPD09UVpayhVhPq28vBw+Pj7cPC0tLRCLxWrzSKVSJCYmahRjMsYQFhaGgoICHD16FIsWLXpmX69duwYnJye1vs6YMQO7du3irqsIDAzE2bNnAQBjxoyBra0tvL29ERcXh9dff11tfQKBoNe2Ozs7sWrVKlRWVqK2thbh4eE4duzYM/s4VGjL20NVgNnQ0IDi4mKYmJggPDwcAODv789NU/3Ex8fDyckJM2fONLjNvXv3Gnwhe1FRERoaGlBYWIjW1laEhobi3r173PSVK1eioaEBCoUC33zzDdzd3REdHY23335b5zaUSiXMzMywbt06zJs3z6B+Dia6JLLHk0/AlEgkSE5ORkBAAH7++WfY2dmpXT326NEjHD9+HGvXrjU4fNXV1UhPT0dFRYVa+byubGxsuGdZ7N69G7NmzcKFCxcQEhICABAKhVyfJ0yYgFdeeQXTpk3DW2+9haioKJ3CaG5ujoMHDwIA5HK52odjJKAtrxatra04fPgwXFxcYGOjeTmnTCbD3bt31Z4voY/29nYsXboUn3766YCUyJuZmQFAnwWjALBixQqMHz9e4yEwfEVb3h6qGjag+wHXjo6OyM/Ph5GR5uf70KFDCAkJwYQJEzSmaRvX3t6uNrxhwwb4+/sjIiKi3/2+d+8etm3bBgsLC/j6+vY5r5GREVxdXZ+bglEKbw9VDRsAtLS04LPPPkNoaCjKy8sxadIkbr5bt26hsLAQeXl5WtdTVlYGkUikNi4wMJD7XSaToaSkBFVVVf3qr7+/P4yMjNDW1gZnZ2fk5ubCwcHhmcsxxp6bglEKb4+na9iysrJgZWWFzMxMfPTRR9z47Oxs2NjYYOHChVrX4+TkpHG04clizpKSEigUCo15Fi9ejICAAJ0rLnJzc+Hu7g4bGxuNdfVGqVTi6tWr8PHx0Wn+kY7C2wuBQAAjIyN0dHRw4xhjyM7ORkxMDExNTQ1ab3JyMuLj49XGeXh4ICMjAwsWLNB5PS+99BImT56sV9s5OTloaWnB4sWL9VpupKLw9vjll1/Q2NgIoHu34cCBA2htbVULVElJCerr6zXCp4/ennY5ceJEteO3/dXe3o7GxkY8fvwYt27dwtGjR5GRkYGEhASNY9b19fUaT+2cMmUKzM3NcfnyZTx8+BDNzc148OABN5+Xl9eA9dVQQxJefU8aDIeCggLukJVIJMK0adNw5MgRtf3VQ4cOwd/fn7tTzkiWmZmJzMxMjBkzBjY2NpgxYwZyc3MRGRmpMe/GjRs1xpWVlWH27NkICwvD9evXufGqEyE6lD4OOirAJMOCCjDJqEbhHWFWrVoFCwsLrT+rVq0a7u6NKLTbMMI0NTXh/v37WqdZWlrC3t5+iHs0OAYiU3S0YYSxt7d/bgI62Gi3gfAWhZfwFoWX8BaFl/AWhZfwFoWX8NaQHCrb+vmeoWiG88EqzXP1fVEVYO7YsQPJycnc+GPHjiEyMnJEnMcnmmjL22PcuHHYtWsXWlpahrsrREcU3h7z5s2DRCLBjh07ep3nm2++wa9+9SuMHTsWUqkU6enpatOlUim2b9+Ot956CyKRCBMnTsQXX3yhNs/NmzcRFRUFsVgMa2trREREPDdlOUONwtvD2NgY27dvx/79+3Hr1i2N6ZWVlYiKikJ0dDQuXbqE1NRUbNmyBV9++aXafOnp6Zg5cyaqqqqwevVqJCQkoK6uDkB31XFISAhEIhHKysogl8thYWGB11577ZnFk0QThfcJkZGR8PLyQkpKisa0PXv2IDg4GFu2bIGrqytiY2OxZs0atYdrA0BYWBhWr14NFxcXJCUlwdbWFmfOnAHQXbrT1dWFrKwseHh4wM3NDdnZ2bhx4wY9cMUAFN6n7Nq1Czk5OdwDAlVqa2sxa9YstXGzZs3C1atXoVQquXGenp7c7wKBABKJhHs2cU1NDf7zn/9AJBJxV4pZW1ujs7MTCoViEF/V84kuzHnKnDlzEBISgs2bNyM2Nlbv5Z+ubRMIBOjq6gLQfT+IGTNm4K9//avGcvQgQv1ReLXYuXMnvLy8MHXqVG6cm5sb5HK52nxyuRyurq4wNjbWab3e3t7Izc2Fvb09LC0tB7TPoxHtNmjh4eGBZcuW4ZNPPuHGvfvuuyguLsa2bdtw5coV5OTk4MCBA3jvvfd0Xu+yZctga2uLiIgIlJWVob6+HqWlpVi3bp3WL4mkb0Oy5dX3pMFIsHXrVuTm5nLD3t7eyMvLwwcffIBt27bB0dERW7du1WvXQigU4p///CeSkpLw+uuv48GDB3jxxRcRHBxMW2IDUCUFGRZUgElGNQov4S0KL+EtCi/hLb3CS5cGkoEyEFnSKbyqs0ZP3ySZEEOpsmTo3TYBHY/zGhsbQywWc+fohULhc3ODYjK0GGNob29HU1MTxGKxzmcntdHpOK+q0cbGxhH3UA3CT2KxGBKJpF8bQZ3Dq6JUKvHo0SODGyTE1NS0X1tcFb3DS8hIQYfKCG9ReAlvUXgJb1F4CW9ReAlvUXgJb1F4CW/9P8n17cNbMm37AAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Legend of cell type info only\n", "g = plt.figure(figsize = (1,1)).add_subplot(111)\n", "g.axis('off')\n", "handles = []\n", "for item in immunecheckpoint_color_dict.keys():\n", " h = g.bar(0,0, color = immunecheckpoint_color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Immune checkpoint'),\n", "\n", "\n", "filename = \"Cellsubtype_legend.png\"\n", "filename = os.path.join(metadata_images_dir, filename)\n", "plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "markdown", "id": "92332958-9ac4-4415-81b3-b3ae27354da3", "metadata": {}, "source": [ "## II.7. BACKGROUND SUBSTRACTION" ] }, { "cell_type": "code", "execution_count": 73, "id": "30fb1c46-e5f9-4a8e-91eb-010c878a8785", "metadata": {}, "outputs": [], "source": [ "# Do background subtraction\n", "# this uses a df (metadata) outside of \n", "# the scope of the lambda...\n", "# careful that this might break inside of a script...\n", "df.loc[:,~df.columns.isin(not_intensities)] = \\\n", " df.loc[:,~df.columns.isin(not_intensities)].apply(lambda column: do_background_sub(column, df, metadata), axis = 0)" ] }, { "cell_type": "code", "execution_count": 74, "id": "b47e45ac-deed-447b-b630-1ccdaa85d195", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nuc_XSample_IDNucleus_RoundnessNuc_Y_InvROI_indexCell_SizeNucleus_SizeAF488_Cell_Intensity_AverageAF488_Cytoplasm_Intensity_AverageAF488_Nucleus_Intensity_Average...r8c2_Nucleus_Intensity_AverageSting_Cell_Intensity_AverageSting_Cytoplasm_Intensity_AverageSting_Nucleus_Intensity_AverageVimentin_Cell_Intensity_AverageVimentin_Cytoplasm_Intensity_AverageVimentin_Nucleus_Intensity_Averagecell_typecell_subtypeimmune_checkpoint
ID
DD3S3_Cell_0823.567871DD3S3.csv0.83532415699.3828120281810.00.00.0...4.1995060.6634070.6367380.72925512.42718910.15173318.045597nonenonenone
DD3S3_Cell_1852.840027DD3S3.csv0.52342115690.5332030200750.00.00.0...4.2156891.1073921.2154370.92731611.63956612.7363489.811600nonenonenone
DD3S3_Cell_2868.272705DD3S3.csv0.68614715682.99414104251650.00.00.0...4.5235761.1515641.3707960.8061073.4867684.7770261.453637nonenonenone
DD3S3_Cell_4704.337280DD3S3.csv0.75762315683.05957004181690.00.00.0...4.5411040.5163230.4599960.5993141.2472571.1281661.422721nonenonenone
DD3S3_Cell_5852.893799DD3S3.csv0.71465115683.01757802011130.00.00.0...4.4564600.5912220.4423030.7071951.8310611.0766292.418584nonenonenone
..................................................................
DD3S2_Cell_9124111890.440430DD3S2.csv0.6543411231.69995145185500.00.00.0...3.3494001.2669641.4225600.846853-0.221802-0.247975-0.151134nonenonenone
DD3S2_Cell_9124311915.123047DD3S2.csv0.8542571228.22216845203810.00.00.0...3.6908241.5067361.5521941.4382680.039343-0.0813390.221112nonenonenone
DD3S2_Cell_9124411961.339844DD3S2.csv0.6515631230.93994145157500.00.00.0...3.2676671.2520321.1629811.4426000.0267720.059065-0.042333nonenonenone
DD3S2_Cell_9124511969.869141DD3S2.csv0.7645021230.71740745115460.00.00.0...3.5223191.5088611.6577591.2855141.4840282.1595650.470724nonenonenone
DD3S2_Cell_9124711965.208984DD3S2.csv0.8730941224.17907745146670.00.00.0...3.5790051.3672901.3870041.3440456.27840211.4978900.124080nonenonenone
\n", "

350554 rows × 118 columns

\n", "
" ], "text/plain": [ " Nuc_X Sample_ID Nucleus_Roundness Nuc_Y_Inv \\\n", "ID \n", "DD3S3_Cell_0 823.567871 DD3S3.csv 0.835324 15699.382812 \n", "DD3S3_Cell_1 852.840027 DD3S3.csv 0.523421 15690.533203 \n", "DD3S3_Cell_2 868.272705 DD3S3.csv 0.686147 15682.994141 \n", "DD3S3_Cell_4 704.337280 DD3S3.csv 0.757623 15683.059570 \n", "DD3S3_Cell_5 852.893799 DD3S3.csv 0.714651 15683.017578 \n", "... ... ... ... ... \n", "DD3S2_Cell_91241 11890.440430 DD3S2.csv 0.654341 1231.699951 \n", "DD3S2_Cell_91243 11915.123047 DD3S2.csv 0.854257 1228.222168 \n", "DD3S2_Cell_91244 11961.339844 DD3S2.csv 0.651563 1230.939941 \n", "DD3S2_Cell_91245 11969.869141 DD3S2.csv 0.764502 1230.717407 \n", "DD3S2_Cell_91247 11965.208984 DD3S2.csv 0.873094 1224.179077 \n", "\n", " ROI_index Cell_Size Nucleus_Size \\\n", "ID \n", "DD3S3_Cell_0 0 281 81 \n", "DD3S3_Cell_1 0 200 75 \n", "DD3S3_Cell_2 0 425 165 \n", "DD3S3_Cell_4 0 418 169 \n", "DD3S3_Cell_5 0 201 113 \n", "... ... ... ... \n", "DD3S2_Cell_91241 45 185 50 \n", "DD3S2_Cell_91243 45 203 81 \n", "DD3S2_Cell_91244 45 157 50 \n", "DD3S2_Cell_91245 45 115 46 \n", "DD3S2_Cell_91247 45 146 67 \n", "\n", " AF488_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 0.0 \n", "DD3S3_Cell_1 0.0 \n", "DD3S3_Cell_2 0.0 \n", "DD3S3_Cell_4 0.0 \n", "DD3S3_Cell_5 0.0 \n", "... ... \n", "DD3S2_Cell_91241 0.0 \n", "DD3S2_Cell_91243 0.0 \n", "DD3S2_Cell_91244 0.0 \n", "DD3S2_Cell_91245 0.0 \n", "DD3S2_Cell_91247 0.0 \n", "\n", " AF488_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 0.0 \n", "DD3S3_Cell_1 0.0 \n", "DD3S3_Cell_2 0.0 \n", "DD3S3_Cell_4 0.0 \n", "DD3S3_Cell_5 0.0 \n", "... ... \n", "DD3S2_Cell_91241 0.0 \n", "DD3S2_Cell_91243 0.0 \n", "DD3S2_Cell_91244 0.0 \n", "DD3S2_Cell_91245 0.0 \n", "DD3S2_Cell_91247 0.0 \n", "\n", " AF488_Nucleus_Intensity_Average ... \\\n", "ID ... \n", "DD3S3_Cell_0 0.0 ... \n", "DD3S3_Cell_1 0.0 ... \n", "DD3S3_Cell_2 0.0 ... \n", "DD3S3_Cell_4 0.0 ... \n", "DD3S3_Cell_5 0.0 ... \n", "... ... ... \n", "DD3S2_Cell_91241 0.0 ... \n", "DD3S2_Cell_91243 0.0 ... \n", "DD3S2_Cell_91244 0.0 ... \n", "DD3S2_Cell_91245 0.0 ... \n", "DD3S2_Cell_91247 0.0 ... \n", "\n", " r8c2_Nucleus_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 4.199506 \n", "DD3S3_Cell_1 4.215689 \n", "DD3S3_Cell_2 4.523576 \n", "DD3S3_Cell_4 4.541104 \n", "DD3S3_Cell_5 4.456460 \n", "... ... \n", "DD3S2_Cell_91241 3.349400 \n", "DD3S2_Cell_91243 3.690824 \n", "DD3S2_Cell_91244 3.267667 \n", "DD3S2_Cell_91245 3.522319 \n", "DD3S2_Cell_91247 3.579005 \n", "\n", " Sting_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 0.663407 \n", "DD3S3_Cell_1 1.107392 \n", "DD3S3_Cell_2 1.151564 \n", "DD3S3_Cell_4 0.516323 \n", "DD3S3_Cell_5 0.591222 \n", "... ... \n", "DD3S2_Cell_91241 1.266964 \n", "DD3S2_Cell_91243 1.506736 \n", "DD3S2_Cell_91244 1.252032 \n", "DD3S2_Cell_91245 1.508861 \n", "DD3S2_Cell_91247 1.367290 \n", "\n", " Sting_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 0.636738 \n", "DD3S3_Cell_1 1.215437 \n", "DD3S3_Cell_2 1.370796 \n", "DD3S3_Cell_4 0.459996 \n", "DD3S3_Cell_5 0.442303 \n", "... ... \n", "DD3S2_Cell_91241 1.422560 \n", "DD3S2_Cell_91243 1.552194 \n", "DD3S2_Cell_91244 1.162981 \n", "DD3S2_Cell_91245 1.657759 \n", "DD3S2_Cell_91247 1.387004 \n", "\n", " Sting_Nucleus_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 0.729255 \n", "DD3S3_Cell_1 0.927316 \n", "DD3S3_Cell_2 0.806107 \n", "DD3S3_Cell_4 0.599314 \n", "DD3S3_Cell_5 0.707195 \n", "... ... \n", "DD3S2_Cell_91241 0.846853 \n", "DD3S2_Cell_91243 1.438268 \n", "DD3S2_Cell_91244 1.442600 \n", "DD3S2_Cell_91245 1.285514 \n", "DD3S2_Cell_91247 1.344045 \n", "\n", " Vimentin_Cell_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 12.427189 \n", "DD3S3_Cell_1 11.639566 \n", "DD3S3_Cell_2 3.486768 \n", "DD3S3_Cell_4 1.247257 \n", "DD3S3_Cell_5 1.831061 \n", "... ... \n", "DD3S2_Cell_91241 -0.221802 \n", "DD3S2_Cell_91243 0.039343 \n", "DD3S2_Cell_91244 0.026772 \n", "DD3S2_Cell_91245 1.484028 \n", "DD3S2_Cell_91247 6.278402 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average \\\n", "ID \n", "DD3S3_Cell_0 10.151733 \n", "DD3S3_Cell_1 12.736348 \n", "DD3S3_Cell_2 4.777026 \n", "DD3S3_Cell_4 1.128166 \n", "DD3S3_Cell_5 1.076629 \n", "... ... \n", "DD3S2_Cell_91241 -0.247975 \n", "DD3S2_Cell_91243 -0.081339 \n", "DD3S2_Cell_91244 0.059065 \n", "DD3S2_Cell_91245 2.159565 \n", "DD3S2_Cell_91247 11.497890 \n", "\n", " Vimentin_Nucleus_Intensity_Average cell_type cell_subtype \\\n", "ID \n", "DD3S3_Cell_0 18.045597 none none \n", "DD3S3_Cell_1 9.811600 none none \n", "DD3S3_Cell_2 1.453637 none none \n", "DD3S3_Cell_4 1.422721 none none \n", "DD3S3_Cell_5 2.418584 none none \n", "... ... ... ... \n", "DD3S2_Cell_91241 -0.151134 none none \n", "DD3S2_Cell_91243 0.221112 none none \n", "DD3S2_Cell_91244 -0.042333 none none \n", "DD3S2_Cell_91245 0.470724 none none \n", "DD3S2_Cell_91247 0.124080 none none \n", "\n", " immune_checkpoint \n", "ID \n", "DD3S3_Cell_0 none \n", "DD3S3_Cell_1 none \n", "DD3S3_Cell_2 none \n", "DD3S3_Cell_4 none \n", "DD3S3_Cell_5 none \n", "... ... \n", "DD3S2_Cell_91241 none \n", "DD3S2_Cell_91243 none \n", "DD3S2_Cell_91244 none \n", "DD3S2_Cell_91245 none \n", "DD3S2_Cell_91247 none \n", "\n", "[350554 rows x 118 columns]" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 75, "id": "729f24de-6494-4eae-91d5-d3eb399f1e56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Nuc_X' 'Sample_ID' 'Nucleus_Roundness' 'Nuc_Y_Inv' 'ROI_index'\n", " 'Cell_Size' 'Nucleus_Size' 'aSMA_Cell_Intensity_Average'\n", " 'aSMA_Cytoplasm_Intensity_Average' 'aSMA_Nucleus_Intensity_Average'\n", " 'AXL_Cell_Intensity_Average' 'AXL_Cytoplasm_Intensity_Average'\n", " 'AXL_Nucleus_Intensity_Average' 'B7H4_Cell_Intensity_Average'\n", " 'B7H4_Cytoplasm_Intensity_Average' 'B7H4_Nucleus_Intensity_Average'\n", " 'CA9_Cell_Intensity_Average' 'CA9_Cytoplasm_Intensity_Average'\n", " 'CA9_Nucleus_Intensity_Average' 'CD4_Cell_Intensity_Average'\n", " 'CD4_Cytoplasm_Intensity_Average' 'CD4_Nucleus_Intensity_Average'\n", " 'CD8_Cell_Intensity_Average' 'CD8_Cytoplasm_Intensity_Average'\n", " 'CD8_Nucleus_Intensity_Average' 'CD11b_Cell_Intensity_Average'\n", " 'CD11b_Cytoplasm_Intensity_Average' 'CD11b_Nucleus_Intensity_Average'\n", " 'CD11c_Cell_Intensity_Average' 'CD11c_Cytoplasm_Intensity_Average'\n", " 'CD11c_Nucleus_Intensity_Average' 'CD20_Cell_Intensity_Average'\n", " 'CD20_Cytoplasm_Intensity_Average' 'CD20_Nucleus_Intensity_Average'\n", " 'CD31_Cell_Intensity_Average' 'CD31_Cytoplasm_Intensity_Average'\n", " 'CD31_Nucleus_Intensity_Average' 'CD44_Cell_Intensity_Average'\n", " 'CD44_Cytoplasm_Intensity_Average' 'CD44_Nucleus_Intensity_Average'\n", " 'CD45_Cell_Intensity_Average' 'CD45_Cytoplasm_Intensity_Average'\n", " 'CD45_Nucleus_Intensity_Average' 'CD68_Cell_Intensity_Average'\n", " 'CD68_Cytoplasm_Intensity_Average' 'CD68_Nucleus_Intensity_Average'\n", " 'CD163_Cell_Intensity_Average' 'CD163_Cytoplasm_Intensity_Average'\n", " 'CD163_Nucleus_Intensity_Average' 'CKs_Cell_Intensity_Average'\n", " 'CKs_Cytoplasm_Intensity_Average' 'CKs_Nucleus_Intensity_Average'\n", " 'ColVI_Cell_Intensity_Average' 'ColVI_Cytoplasm_Intensity_Average'\n", " 'ColVI_Nucleus_Intensity_Average' 'Desmin_Cell_Intensity_Average'\n", " 'Desmin_Cytoplasm_Intensity_Average' 'Desmin_Nucleus_Intensity_Average'\n", " 'Ecad_Cell_Intensity_Average' 'Ecad_Cytoplasm_Intensity_Average'\n", " 'Ecad_Nucleus_Intensity_Average' 'Fibronectin_Cell_Intensity_Average'\n", " 'Fibronectin_Cytoplasm_Intensity_Average'\n", " 'Fibronectin_Nucleus_Intensity_Average' 'FOXP3_Cell_Intensity_Average'\n", " 'FOXP3_Cytoplasm_Intensity_Average' 'FOXP3_Nucleus_Intensity_Average'\n", " 'GATA3_Cell_Intensity_Average' 'GATA3_Cytoplasm_Intensity_Average'\n", " 'GATA3_Nucleus_Intensity_Average' 'HLA_Cell_Intensity_Average'\n", " 'HLA_Cytoplasm_Intensity_Average' 'HLA_Nucleus_Intensity_Average'\n", " 'Ki67_Cell_Intensity_Average' 'Ki67_Cytoplasm_Intensity_Average'\n", " 'Ki67_Nucleus_Intensity_Average' 'MMP9_Cell_Intensity_Average'\n", " 'MMP9_Cytoplasm_Intensity_Average' 'MMP9_Nucleus_Intensity_Average'\n", " 'PD1_Cell_Intensity_Average' 'PD1_Cytoplasm_Intensity_Average'\n", " 'PD1_Nucleus_Intensity_Average' 'PDGFR_Cell_Intensity_Average'\n", " 'PDGFR_Cytoplasm_Intensity_Average' 'PDGFR_Nucleus_Intensity_Average'\n", " 'PDL1_Cell_Intensity_Average' 'PDL1_Cytoplasm_Intensity_Average'\n", " 'PDL1_Nucleus_Intensity_Average' 'r5c2_Cell_Intensity_Average'\n", " 'r5c2_Cytoplasm_Intensity_Average' 'r5c2_Nucleus_Intensity_Average'\n", " 'r7c2_Cell_Intensity_Average' 'r7c2_Cytoplasm_Intensity_Average'\n", " 'r7c2_Nucleus_Intensity_Average' 'r8c2_Cell_Intensity_Average'\n", " 'r8c2_Cytoplasm_Intensity_Average' 'r8c2_Nucleus_Intensity_Average'\n", " 'Sting_Cell_Intensity_Average' 'Sting_Cytoplasm_Intensity_Average'\n", " 'Sting_Nucleus_Intensity_Average' 'Vimentin_Cell_Intensity_Average'\n", " 'Vimentin_Cytoplasm_Intensity_Average'\n", " 'Vimentin_Nucleus_Intensity_Average' 'cell_type' 'cell_subtype'\n", " 'immune_checkpoint']\n" ] } ], "source": [ "# Drop AF columns\n", "df = df.filter(regex='^(?!AF\\d{3}).*')\n", "print(df.columns.values)" ] }, { "cell_type": "markdown", "id": "5c23ca0a-2d0e-4f30-b358-54acc69ac3d0", "metadata": {}, "source": [ "## II.8. SAVE" ] }, { "cell_type": "code", "execution_count": 76, "id": "8c0c8c62-9b55-451f-8c2f-bdc9a33b9fff", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/DD3S3_bs.csv was created!\n", "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/TMA_bs.csv was created!\n", "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/DD3S1_bs.csv was created!\n", "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/DD3S2_bs.csv was created!\n" ] } ], "source": [ "# Save the data by Sample_ID\n", "# Check for the existence of the output file first\n", "for sample in ls_samples:\n", " sample_id = sample.split('_')[0]\n", " filename = os.path.join(output_data_dir, sample_id + \"_\" + step_suffix + \".csv\")\n", " if os.path.exists(filename):\n", " print(\"File by name \"+filename+\" already exists.\")\n", " else:\n", " sample_id_csv = sample_id + '.csv'\n", " df_save = df.loc[df['Sample_ID'] == sample_id_csv, :]\n", " #print(df_save)\n", " filename = os.path.join(output_data_dir, sample_id + \"_\" + step_suffix + \".csv\")\n", " df_save.to_csv(filename, index=True, index_label='ID') # Set index parameter to True to retain the index column\n", " print(\"File \" + filename + \" was created!\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8ef5b66d-fcc6-4677-aab7-f5a748196295", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "5a2991f1-7ef3-430e-82f1-810ad70e769d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }