{ "cells": [ { "cell_type": "markdown", "id": "5fad9226-7ffa-41fc-a6d8-55dcdd1c9eb5", "metadata": {}, "source": [ "# CyCIF PYTHON PIPELINE" ] }, { "cell_type": "raw", "id": "8337a11b-813a-4abd-8a5e-45a1dcde430f", "metadata": {}, "source": [ "This sequence of Jupiter Notebooks is designed to run the CyCIF analysis after the ASHLAR registratiom step.\n", "When a '*' is on a part name (see the table of content to your left), it means the user have an input to give.\n", "The pipeline is composed of 5 Notebooks (see README file)." ] }, { "cell_type": "markdown", "id": "87b047ee-225e-415d-a0ae-17f30c0d5f35", "metadata": {}, "source": [ "# I. QC/EDA NOTEBOOK" ] }, { "cell_type": "raw", "id": "fbb817d5-aee1-447a-a8dd-786bbd3cc381", "metadata": {}, "source": [ "10/01/24\n", "Modifications by ZoƩ Gerber\n", "from an original code from Marilyne Labrie" ] }, { "cell_type": "raw", "id": "a48dc9ff-3dd1-4f86-bbb5-68bc6254b787", "metadata": {}, "source": [ "I.1. PACKAGES IMPORT\n", "I.2. DIRECTORIES\n", "I.3. FILES\n", " I.3.1. DATA\n", " I.3.2. NOT_INTENSITIES\n", "I.4. QC CHECKS\n", "I.5. COLUMNS OF INTERESTS\n", "I.6. EXPOSURE TIME\n", "I.7. COLORS WORKFLOW\n", " I.7.1. CHANNELS COLORS\n", " I.7.2. ROUNDS COLORS\n", " I.7.3. SAMPLES COLORS\n", " I.7.4. CLUSTERS COLORS\n", "I.8. SAVE" ] }, { "cell_type": "markdown", "id": "46755cfd-7896-4ca0-ba50-6792612e7650", "metadata": {}, "source": [ "## I.1. PACKAGES IMPORT" ] }, { "cell_type": "code", "execution_count": 4, "id": "d52f4de4-6db2-4590-9574-326ccf5bc97e", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "import os\n", "import plotly as plt\n", "import seaborn as sb\n", "\n", "from my_modules import *" ] }, { "cell_type": "code", "execution_count": 5, "id": "da8aaae9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: panel in /opt/anaconda3/lib/python3.11/site-packages (1.3.8)\n", "Requirement already satisfied: watchfiles in /opt/anaconda3/lib/python3.11/site-packages (0.21.0)\n", "Requirement already satisfied: bokeh<3.4.0,>=3.2.0 in /opt/anaconda3/lib/python3.11/site-packages (from panel) (3.3.4)\n", "Requirement already satisfied: param<3.0,>=2.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2.0.2)\n", "Requirement already satisfied: pyviz-comms>=2.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2.3.0)\n", "Requirement already satisfied: xyzservices>=2021.09.1 in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2022.9.0)\n", "Requirement already satisfied: markdown in /opt/anaconda3/lib/python3.11/site-packages (from panel) (3.4.1)\n", "Requirement already satisfied: markdown-it-py in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2.2.0)\n", "Requirement already satisfied: linkify-it-py in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2.0.0)\n", "Requirement already satisfied: mdit-py-plugins in /opt/anaconda3/lib/python3.11/site-packages (from panel) (0.3.0)\n", "Requirement already satisfied: requests in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.48.0 in /opt/anaconda3/lib/python3.11/site-packages (from panel) (4.65.0)\n", "Requirement already satisfied: bleach in /opt/anaconda3/lib/python3.11/site-packages (from panel) (4.1.0)\n", "Requirement already satisfied: typing-extensions in /opt/anaconda3/lib/python3.11/site-packages (from panel) (4.9.0)\n", "Requirement already satisfied: pandas>=1.2 in /opt/anaconda3/lib/python3.11/site-packages (from panel) (2.1.4)\n", "Requirement already satisfied: anyio>=3.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from watchfiles) (4.2.0)\n", "Requirement already satisfied: idna>=2.8 in /opt/anaconda3/lib/python3.11/site-packages (from anyio>=3.0.0->watchfiles) (3.4)\n", "Requirement already satisfied: sniffio>=1.1 in /opt/anaconda3/lib/python3.11/site-packages (from anyio>=3.0.0->watchfiles) (1.3.0)\n", "Requirement already satisfied: Jinja2>=2.9 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (3.1.3)\n", "Requirement already satisfied: contourpy>=1 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (1.2.0)\n", "Requirement already satisfied: numpy>=1.16 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (1.26.4)\n", "Requirement already satisfied: packaging>=16.8 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (23.1)\n", "Requirement already satisfied: pillow>=7.1.0 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (10.2.0)\n", "Requirement already satisfied: PyYAML>=3.10 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (6.0.1)\n", "Requirement already satisfied: tornado>=5.1 in /opt/anaconda3/lib/python3.11/site-packages (from bokeh<3.4.0,>=3.2.0->panel) (6.3.3)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.11/site-packages (from pandas>=1.2->panel) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.11/site-packages (from pandas>=1.2->panel) (2023.3.post1)\n", "Requirement already satisfied: tzdata>=2022.1 in /opt/anaconda3/lib/python3.11/site-packages (from pandas>=1.2->panel) (2023.3)\n", "Requirement already satisfied: six>=1.9.0 in /opt/anaconda3/lib/python3.11/site-packages (from bleach->panel) (1.16.0)\n", "Requirement already satisfied: webencodings in /opt/anaconda3/lib/python3.11/site-packages (from bleach->panel) (0.5.1)\n", "Requirement already satisfied: uc-micro-py in /opt/anaconda3/lib/python3.11/site-packages (from linkify-it-py->panel) (1.0.1)\n", "Requirement already satisfied: mdurl~=0.1 in /opt/anaconda3/lib/python3.11/site-packages (from markdown-it-py->panel) (0.1.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.11/site-packages (from requests->panel) (2.0.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/lib/python3.11/site-packages (from requests->panel) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/lib/python3.11/site-packages (from requests->panel) (2024.2.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/anaconda3/lib/python3.11/site-packages (from Jinja2>=2.9->bokeh<3.4.0,>=3.2.0->panel) (2.1.3)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install panel watchfiles" ] }, { "cell_type": "code", "execution_count": 6, "id": "efdf3910", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pygments in /opt/anaconda3/lib/python3.11/site-packages (2.15.1)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install pygments" ] }, { "cell_type": "code", "execution_count": 7, "id": "1e483a15", "metadata": {}, "outputs": [], "source": [ "import plotly.express as px\n", "import panel as pn\n", "import holoviews as hv\n", "import hvplot.pandas\n", "from my_modules import apply_header_changes, apply_df_changes, compare_headers, add_metadata_location, shorten_feature_names" ] }, { "cell_type": "code", "execution_count": 8, "id": "9936cc24-34f7-41b5-b184-94a8f2237b05", "metadata": { "tags": [] }, "outputs": [], "source": [ "#Silence FutureWarnings & UserWarnings\n", "warnings.filterwarnings('ignore', category= FutureWarning)\n", "warnings.filterwarnings('ignore', category= UserWarning)" ] }, { "cell_type": "code", "execution_count": 9, "id": "73290289", "metadata": {}, "outputs": [ { "data": { "application/javascript": [ "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", " var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", " var reloading = true;\n", " var Bokeh = root.Bokeh;\n", "\n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) {\n", " if (callback != null)\n", " callback();\n", " });\n", " } finally {\n", " delete root._bokeh_onload_callbacks;\n", " }\n", " console.debug(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", " if (css_urls == null) css_urls = [];\n", " if (js_urls == null) js_urls = [];\n", " if (js_modules == null) js_modules = [];\n", " if (js_exports == null) js_exports = {};\n", "\n", " root._bokeh_onload_callbacks.push(callback);\n", "\n", " if (root._bokeh_is_loading > 0) {\n", " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " if (!reloading) {\n", " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " }\n", "\n", " function on_load() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", " run_callbacks()\n", " }\n", " }\n", " window._bokeh_on_load = on_load\n", "\n", " function on_error() {\n", " console.error(\"failed to load \" + url);\n", " }\n", "\n", " var skip = [];\n", " if (window.requirejs) {\n", " window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n", " require([\"jspanel\"], function(jsPanel) {\n", "\twindow.jsPanel = jsPanel\n", "\ton_load()\n", " })\n", " require([\"jspanel-modal\"], function() {\n", "\ton_load()\n", " })\n", " require([\"jspanel-tooltip\"], function() {\n", "\ton_load()\n", " })\n", " require([\"jspanel-hint\"], function() {\n", "\ton_load()\n", " })\n", " require([\"jspanel-layout\"], function() {\n", "\ton_load()\n", " })\n", " require([\"jspanel-contextmenu\"], function() {\n", "\ton_load()\n", " })\n", " require([\"jspanel-dock\"], function() {\n", "\ton_load()\n", " })\n", " require([\"gridstack\"], function(GridStack) {\n", "\twindow.GridStack = GridStack\n", "\ton_load()\n", " })\n", " require([\"notyf\"], function() {\n", "\ton_load()\n", " })\n", " root._bokeh_is_loading = css_urls.length + 9;\n", " } else {\n", " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", " }\n", "\n", " var existing_stylesheets = []\n", " var links = document.getElementsByTagName('link')\n", " for (var i = 0; i < links.length; i++) {\n", " var link = links[i]\n", " if (link.href != null) {\n", "\texisting_stylesheets.push(link.href)\n", " }\n", " }\n", " for (var i = 0; i < css_urls.length; i++) {\n", " var url = css_urls[i];\n", " if (existing_stylesheets.indexOf(url) !== -1) {\n", "\ton_load()\n", "\tcontinue;\n", " }\n", " const element = document.createElement(\"link\");\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.rel = \"stylesheet\";\n", " element.type = \"text/css\";\n", " element.href = url;\n", " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", " document.body.appendChild(element);\n", " } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n", " var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n", " for (var i = 0; i < urls.length; i++) {\n", " skip.push(urls[i])\n", " }\n", " } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n", " var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n", " for (var i = 0; i < urls.length; i++) {\n", " skip.push(urls[i])\n", " }\n", " } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n", " var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n", " for (var i = 0; i < urls.length; i++) {\n", " skip.push(urls[i])\n", " }\n", " } var existing_scripts = []\n", " var scripts = document.getElementsByTagName('script')\n", " for (var i = 0; i < scripts.length; i++) {\n", " var script = scripts[i]\n", " if (script.src != null) {\n", "\texisting_scripts.push(script.src)\n", " }\n", " }\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", "\tif (!window.requirejs) {\n", "\t on_load();\n", "\t}\n", "\tcontinue;\n", " }\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " for (var i = 0; i < js_modules.length; i++) {\n", " var url = js_modules[i];\n", " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", "\tif (!window.requirejs) {\n", "\t on_load();\n", "\t}\n", "\tcontinue;\n", " }\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " element.type = \"module\";\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " for (const name in js_exports) {\n", " var url = js_exports[name];\n", " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", "\tif (!window.requirejs) {\n", "\t on_load();\n", "\t}\n", "\tcontinue;\n", " }\n", " var element = document.createElement('script');\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.type = \"module\";\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " element.textContent = `\n", " import ${name} from \"${url}\"\n", " window.${name} = ${name}\n", " window._bokeh_on_load()\n", " `\n", " document.head.appendChild(element);\n", " }\n", " if (!js_urls.length && !js_modules.length) {\n", " on_load()\n", " }\n", " };\n", "\n", " function inject_raw_css(css) {\n", " const element = document.createElement(\"style\");\n", " element.appendChild(document.createTextNode(css));\n", " document.body.appendChild(element);\n", " }\n", "\n", " var js_urls = [];\n", " var js_modules = [];\n", " var js_exports = {};\n", " var css_urls = [];\n", " var inline_js = [ function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", "function(Bokeh) {} // ensure no trailing comma for IE\n", " ];\n", "\n", " function run_inline_js() {\n", " if ((root.Bokeh !== undefined) || (force === true)) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", "\ttry {\n", " inline_js[i].call(root, root.Bokeh);\n", "\t} catch(e) {\n", "\t if (!reloading) {\n", "\t throw e;\n", "\t }\n", "\t}\n", " }\n", " // Cache old bokeh versions\n", " if (Bokeh != undefined && !reloading) {\n", "\tvar NewBokeh = root.Bokeh;\n", "\tif (Bokeh.versions === undefined) {\n", "\t Bokeh.versions = new Map();\n", "\t}\n", "\tif (NewBokeh.version !== Bokeh.version) {\n", "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", "\t}\n", "\troot.Bokeh = Bokeh;\n", " }} else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " }\n", " root._bokeh_is_initializing = false\n", " }\n", "\n", " function load_or_wait() {\n", " // Implement a backoff loop that tries to ensure we do not load multiple\n", " // versions of Bokeh and its dependencies at the same time.\n", " // In recent versions we use the root._bokeh_is_initializing flag\n", " // to determine whether there is an ongoing attempt to initialize\n", " // bokeh, however for backward compatibility we also try to ensure\n", " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", " // before older versions are fully initialized.\n", " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", " root._bokeh_is_initializing = false;\n", " root._bokeh_onload_callbacks = undefined;\n", " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", " load_or_wait();\n", " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", " setTimeout(load_or_wait, 100);\n", " } else {\n", " root._bokeh_is_initializing = true\n", " root._bokeh_onload_callbacks = []\n", " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", " if (!reloading && !bokeh_loaded) {\n", "\troot.Bokeh = undefined;\n", " }\n", " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", "\trun_inline_js();\n", " });\n", " }\n", " }\n", " // Give older versions of the autoload script a head-start to ensure\n", " // they initialize before we start loading newer version.\n", " setTimeout(load_or_wait, 100)\n", "}(window));" ], "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.3.4'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = true;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 9;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.3.8/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.3.8/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", "}\n", "\n", "\n", " function JupyterCommManager() {\n", " }\n", "\n", " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", " comm_manager.register_target(comm_id, function(comm) {\n", " comm.on_msg(msg_handler);\n", " });\n", " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", " comm.onMsg = msg_handler;\n", " });\n", " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", " var messages = comm.messages[Symbol.asyncIterator]();\n", " function processIteratorResult(result) {\n", " var message = result.value;\n", " console.log(message)\n", " var content = {data: message.data, comm_id};\n", " var buffers = []\n", " for (var buffer of message.buffers || []) {\n", " buffers.push(new DataView(buffer))\n", " }\n", " var metadata = message.metadata || {};\n", " var msg = {content, buffers, metadata}\n", " msg_handler(msg);\n", " return messages.next().then(processIteratorResult);\n", " }\n", " return messages.next().then(processIteratorResult);\n", " })\n", " }\n", " }\n", "\n", " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", " if (comm_id in window.PyViz.comms) {\n", " return window.PyViz.comms[comm_id];\n", " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", " if (msg_handler) {\n", " comm.on_msg(msg_handler);\n", " }\n", " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", " comm.open();\n", " if (msg_handler) {\n", " comm.onMsg = msg_handler;\n", " }\n", " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", " comm_promise.then((comm) => {\n", " window.PyViz.comms[comm_id] = comm;\n", " if (msg_handler) {\n", " var messages = comm.messages[Symbol.asyncIterator]();\n", " function processIteratorResult(result) {\n", " var message = result.value;\n", " var content = {data: message.data};\n", " var metadata = message.metadata || {comm_id};\n", " var msg = {content, metadata}\n", " msg_handler(msg);\n", " return messages.next().then(processIteratorResult);\n", " }\n", " return messages.next().then(processIteratorResult);\n", " }\n", " }) \n", " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", " return comm_promise.then((comm) => {\n", " comm.send(data, metadata, buffers, disposeOnDone);\n", " });\n", " };\n", " var comm = {\n", " send: sendClosure\n", " };\n", " }\n", " window.PyViz.comms[comm_id] = comm;\n", " return comm;\n", " }\n", " window.PyViz.comm_manager = new JupyterCommManager();\n", " \n", "\n", "\n", "var JS_MIME_TYPE = 'application/javascript';\n", "var HTML_MIME_TYPE = 'text/html';\n", "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", "var CLASS_NAME = 'output';\n", "\n", "/**\n", " * Render data to the DOM node\n", " */\n", "function render(props, node) {\n", " var div = document.createElement(\"div\");\n", " var script = document.createElement(\"script\");\n", " node.appendChild(div);\n", " node.appendChild(script);\n", "}\n", "\n", "/**\n", " * Handle when a new output is added\n", " */\n", "function handle_add_output(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", " if (id !== undefined) {\n", " var nchildren = toinsert.length;\n", " var html_node = toinsert[nchildren-1].children[0];\n", " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", " var scripts = [];\n", " var nodelist = html_node.querySelectorAll(\"script\");\n", " for (var i in nodelist) {\n", " if (nodelist.hasOwnProperty(i)) {\n", " scripts.push(nodelist[i])\n", " }\n", " }\n", "\n", " scripts.forEach( function (oldScript) {\n", " var newScript = document.createElement(\"script\");\n", " var attrs = [];\n", " var nodemap = oldScript.attributes;\n", " for (var j in nodemap) {\n", " if (nodemap.hasOwnProperty(j)) {\n", " attrs.push(nodemap[j])\n", " }\n", " }\n", " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", " oldScript.parentNode.replaceChild(newScript, oldScript);\n", " });\n", " if (JS_MIME_TYPE in output.data) {\n", " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", " }\n", " output_area._hv_plot_id = id;\n", " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", " window.PyViz.plot_index[id] = Bokeh.index[id];\n", " } else {\n", " window.PyViz.plot_index[id] = null;\n", " }\n", " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", "}\n", "\n", "/**\n", " * Handle when an output is cleared or removed\n", " */\n", "function handle_clear_output(event, handle) {\n", " var id = handle.cell.output_area._hv_plot_id;\n", " var server_id = handle.cell.output_area._bokeh_server_id;\n", " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", " if (server_id !== null) {\n", " comm.send({event_type: 'server_delete', 'id': server_id});\n", " return;\n", " } else if (comm !== null) {\n", " comm.send({event_type: 'delete', 'id': id});\n", " }\n", " delete PyViz.plot_index[id];\n", " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", " var doc = window.Bokeh.index[id].model.document\n", " doc.clear();\n", " const i = window.Bokeh.documents.indexOf(doc);\n", " if (i > -1) {\n", " window.Bokeh.documents.splice(i, 1);\n", " }\n", " }\n", "}\n", "\n", "/**\n", " * Handle kernel restart event\n", " */\n", "function handle_kernel_cleanup(event, handle) {\n", " delete PyViz.comms[\"hv-extension-comm\"];\n", " window.PyViz.plot_index = {}\n", "}\n", "\n", "/**\n", " * Handle update_display_data messages\n", " */\n", "function handle_update_output(event, handle) {\n", " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", " handle_add_output(event, handle)\n", "}\n", "\n", "function register_renderer(events, OutputArea) {\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[0]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " events.on('output_added.OutputArea', handle_add_output);\n", " events.on('output_updated.OutputArea', handle_update_output);\n", " events.on('clear_output.CodeCell', handle_clear_output);\n", " events.on('delete.Cell', handle_clear_output);\n", " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", "\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " safe: true,\n", " index: 0\n", " });\n", "}\n", "\n", "if (window.Jupyter !== undefined) {\n", " try {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " } catch(err) {\n", " }\n", "}\n" ], "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ] }, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "8e44b995-9262-4ec8-9602-2f43b99d97ac" } }, "output_type": "display_data" } ], "source": [ "# Initialize Panel extension\n", "pn.extension()" ] }, { "cell_type": "markdown", "id": "b179f303-c098-4c30-b4b9-df10db6c485a", "metadata": {}, "source": [ "## I.2. *DIRECTORIES" ] }, { "cell_type": "code", "execution_count": 10, "id": "849b1dd3-c940-4c7a-8cf6-2fd5b4dc43f4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431\n" ] } ], "source": [ "# Set base directory\n", "\n", "directorio_actual = os.getcwd()\n", "print(directorio_actual)\n", "\n", "##### MAC WORKSTATION #####\n", "#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'\n", "###########################\n", "\n", "##### WINDOWS WORKSTATION #####\n", "#base_dir = r'C:\\Users\\LaboLabrie\\gerz2701\\cyCIF-pipeline\\Set_B'\n", "###############################\n", "\n", "##### LOCAL WORKSTATION #####\n", "base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'\n", "#############################\n", "\n", "#set_name = 'Set_A'\n", "set_name = 'test'" ] }, { "cell_type": "raw", "id": "1f68584f-c1f6-49cd-99f8-5a7cc8aae26e", "metadata": {}, "source": [ "The project is organized as :\n", "main dir \n", " code\n", " proj_data > all input csv files\n", " proj_metadata > exposure time csv file, images dir,...\n", " proj_qc_eda > csv after the QC/EDA step" ] }, { "cell_type": "code", "execution_count": 11, "id": "e8a4b389-1136-4470-9898-29fd39baf1f5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/ directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_data directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda/images directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata directory already exists !\n", "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/images directory already exists !\n" ] } ], "source": [ "project_name = set_name # Project name\n", "step_suffix = 'qc_eda' # Curent part (here part I)\n", "previous_step_suffix_long = \"\" # Previous part (here empty)\n", "\n", "# Initial input data directory\n", "input_data_dir = os.path.join(base_dir, project_name + \"_data\")\n", "\n", "# QC/EDA output directories\n", "# global output\n", "output_data_dir = os.path.join(base_dir, project_name + \"_\" + step_suffix)\n", "# images subdirectory\n", "output_images_dir = os.path.join(output_data_dir,\"images\")\n", "\n", "# Data and Metadata directories\n", "# global data\n", "metadata_dir = os.path.join(base_dir, project_name + \"_metadata\")\n", "# images subdirectory\n", "metadata_images_dir = os.path.join(metadata_dir,\"images\")\n", "\n", "# Create directories if they don't already exist\n", "for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:\n", " if not os.path.exists(d):\n", " print(\"Creation of the\" , d, \"directory...\")\n", " os.makedirs(d)\n", " else :\n", " print(\"The\", d, \"directory already exists !\")\n", "\n", "os.chdir(input_data_dir)" ] }, { "cell_type": "code", "execution_count": 12, "id": "b6e9df25-588a-41b4-a1f5-45bb81f012ff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "base_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/\n", "input_data_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_data\n", "output_data_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda\n", "output_images_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda/images\n", "metadata_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata\n", "metadata_images_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/images\n" ] } ], "source": [ "# Verify paths\n", "print('base_dir :', base_dir)\n", "print('input_data_dir :', input_data_dir)\n", "print('output_data_dir :', output_data_dir)\n", "print('output_images_dir :', output_images_dir)\n", "print('metadata_dir :', metadata_dir)\n", "print('metadata_images_dir :', metadata_images_dir)" ] }, { "cell_type": "markdown", "id": "44ebdb24-d428-4948-8d9d-485e4591212b", "metadata": {}, "source": [ "## I.3. FILES" ] }, { "cell_type": "raw", "id": "b81abd9c-2501-4b31-8c71-aa191c518b31", "metadata": {}, "source": [ "Don't forget to put your data in the projname_data directory !" ] }, { "cell_type": "code", "execution_count": 13, "id": "b2569b34-ef84-4af6-836d-befe3bdda706", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The following CSV files were detected:\n", "\n", " ['DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv'] \n", "\n", "in /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_data directory.\n" ] } ], "source": [ "# Listing all the .csv files in the metadata/data directory\n", "# Don't forget to move the csv files into the proj_data directory\n", "# if the data dir is empty it's not going to work \n", "ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(\".csv\")]\n", "\n", "print(\"The following CSV files were detected:\\n\\n\",[sample for sample in ls_samples], \"\\n\\nin\", input_data_dir, \"directory.\")\n", "\n", "#print(ls_samples[0])" ] }, { "cell_type": "code", "execution_count": 14, "id": "591d76f3-8b5b-4dfc-a71a-0d3ec6eeb93e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df :\n", " Cell Size Nuc X Nuc Y Inv ROI index Nucleus Size \\\n", "Cell Id \n", "1@1 339 1484.771729 16632.205078 0 127 \n", "\n", " Nucleus Roundness AF488 Cell Intensity Average \\\n", "Cell Id \n", "1@1 0.95504 2385.867188 \n", "\n", " AF488 Cytoplasm Intensity Average AF488 Nucleus Intensity Average \\\n", "Cell Id \n", "1@1 2356.6604 2434.62207 \n", "\n", " AF555 Cell Intensity Average ... r7c2 Nucleus Intensity Average \\\n", "Cell Id ... \n", "1@1 1358.528076 ... 290.582672 \n", "\n", " r8c2 Cell Intensity Average r8c2 Cytoplasm Intensity Average \\\n", "Cell Id \n", "1@1 341.790558 337.82547 \n", "\n", " r8c2 Nucleus Intensity Average Sting Cell Intensity Average \\\n", "Cell Id \n", "1@1 348.409454 1567.100342 \n", "\n", " Sting Cytoplasm Intensity Average Sting Nucleus Intensity Average \\\n", "Cell Id \n", "1@1 1533.22168 1623.653564 \n", "\n", " Vimentin Cell Intensity Average \\\n", "Cell Id \n", "1@1 7279.144531 \n", "\n", " Vimentin Cytoplasm Intensity Average \\\n", "Cell Id \n", "1@1 7040.108398 \n", "\n", " Vimentin Nucleus Intensity Average \n", "Cell Id \n", "1@1 7678.165527 \n", "\n", "[1 rows x 141 columns] \n", "\n", "df's columns :\n", " Index(['Cell Size', 'Nuc X', 'Nuc Y Inv', 'ROI index', 'Nucleus Size',\n", " 'Nucleus Roundness', 'AF488 Cell Intensity Average',\n", " 'AF488 Cytoplasm Intensity Average', 'AF488 Nucleus Intensity Average',\n", " 'AF555 Cell Intensity Average',\n", " ...\n", " 'r7c2 Nucleus Intensity Average', 'r8c2 Cell Intensity Average',\n", " 'r8c2 Cytoplasm Intensity Average', 'r8c2 Nucleus Intensity Average',\n", " 'Sting Cell Intensity Average', 'Sting Cytoplasm Intensity Average',\n", " 'Sting Nucleus Intensity Average', 'Vimentin Cell Intensity Average',\n", " 'Vimentin Cytoplasm Intensity Average',\n", " 'Vimentin Nucleus Intensity Average'],\n", " dtype='object', length=141) \n", "\n", "df's index :\n", " Index(['1@1'], dtype='object', name='Cell Id') \n", "\n", "df's index name :\n", " Cell Id\n" ] } ], "source": [ "# First gather information on expected headers using first file in ls_samples\n", "# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples\n", "df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)\n", "\n", "\n", "# Make sure the file was imported correctly\n", "print(\"df :\\n\", df.head(), \"\\n\")\n", "print(\"df's columns :\\n\", df.columns, \"\\n\")\n", "print(\"df's index :\\n\", df.index, \"\\n\")\n", "print(\"df's index name :\\n\", df.index.name)" ] }, { "cell_type": "code", "execution_count": 15, "id": "7caa2335-ea22-4973-b50c-7087349c2dc6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Expected the first column in input file (index_col = 0) to be 'ID'. \n", "This column will be used to set the index names (cell number for each sample). \n", "It appears that the column 'Cell Id' was actually the imported as the index column.\n", "A new index name (first column) will be given ('ID') to replace the current one 'Cell Id'\n", "\n", "['Cell_Size' 'Nuc_X' 'Nuc_Y_Inv' 'ROI_index' 'Nucleus_Size'\n", " 'Nucleus_Roundness' 'AF488_Cell_Intensity_Average'\n", " 'AF488_Cytoplasm_Intensity_Average' 'AF488_Nucleus_Intensity_Average'\n", " 'AF555_Cell_Intensity_Average' 'AF555_Cytoplasm_Intensity_Average'\n", " 'AF555_Nucleus_Intensity_Average' 'AF647_Cell_Intensity_Average'\n", " 'AF647_Cytoplasm_Intensity_Average' 'AF647_Nucleus_Intensity_Average'\n", " 'AF750_Cell_Intensity_Average' 'AF750_Cytoplasm_Intensity_Average'\n", " 'AF750_Nucleus_Intensity_Average' 'aSMA_Cell_Intensity_Average'\n", " 'aSMA_Cytoplasm_Intensity_Average' 'aSMA_Nucleus_Intensity_Average'\n", " 'AXL_Cell_Intensity_Average' 'AXL_Cytoplasm_Intensity_Average'\n", " 'AXL_Nucleus_Intensity_Average' 'B7H4_Cell_Intensity_Average'\n", " 'B7H4_Cytoplasm_Intensity_Average' 'B7H4_Nucleus_Intensity_Average'\n", " 'CA9_Cell_Intensity_Average' 'CA9_Cytoplasm_Intensity_Average'\n", " 'CA9_Nucleus_Intensity_Average' 'CD4_Cell_Intensity_Average'\n", " 'CD4_Cytoplasm_Intensity_Average' 'CD4_Nucleus_Intensity_Average'\n", " 'CD8_Cell_Intensity_Average' 'CD8_Cytoplasm_Intensity_Average'\n", " 'CD8_Nucleus_Intensity_Average' 'CD11b_Cell_Intensity_Average'\n", " 'CD11b_Cytoplasm_Intensity_Average' 'CD11b_Nucleus_Intensity_Average'\n", " 'CD11c_Cell_Intensity_Average' 'CD11c_Cytoplasm_Intensity_Average'\n", " 'CD11c_Nucleus_Intensity_Average' 'CD20_Cell_Intensity_Average'\n", " 'CD20_Cytoplasm_Intensity_Average' 'CD20_Nucleus_Intensity_Average'\n", " 'CD31_Cell_Intensity_Average' 'CD31_Cytoplasm_Intensity_Average'\n", " 'CD31_Nucleus_Intensity_Average' 'CD44_Cell_Intensity_Average'\n", " 'CD44_Cytoplasm_Intensity_Average' 'CD44_Nucleus_Intensity_Average'\n", " 'CD45_Cell_Intensity_Average' 'CD45_Cytoplasm_Intensity_Average'\n", " 'CD45_Nucleus_Intensity_Average' 'CD68_Cell_Intensity_Average'\n", " 'CD68_Cytoplasm_Intensity_Average' 'CD68_Nucleus_Intensity_Average'\n", " 'CD163_Cell_Intensity_Average' 'CD163_Cytoplasm_Intensity_Average'\n", " 'CD163_Nucleus_Intensity_Average' 'CKs_Cell_Intensity_Average'\n", " 'CKs_Cytoplasm_Intensity_Average' 'CKs_Nucleus_Intensity_Average'\n", " 'ColVI_Cell_Intensity_Average' 'ColVI_Cytoplasm_Intensity_Average'\n", " 'ColVI_Nucleus_Intensity_Average' 'DAPI0_Cell_Intensity_Average'\n", " 'DAPI0_Cytoplasm_Intensity_Average' 'DAPI0_Nucleus_Intensity_Average'\n", " 'DAPI1_Cell_Intensity_Average' 'DAPI1_Cytoplasm_Intensity_Average'\n", " 'DAPI1_Nucleus_Intensity_Average' 'DAPI2_Cell_Intensity_Average'\n", " 'DAPI2_Cytoplasm_Intensity_Average' 'DAPI2_Nucleus_Intensity_Average'\n", " 'DAPI3_Cell_Intensity_Average' 'DAPI3_Cytoplasm_Intensity_Average'\n", " 'DAPI3_Nucleus_Intensity_Average' 'DAPI4_Cell_Intensity_Average'\n", " 'DAPI4_Cytoplasm_Intensity_Average' 'DAPI4_Nucleus_Intensity_Average'\n", " 'DAPI5_Cell_Intensity_Average' 'DAPI5_Cytoplasm_Intensity_Average'\n", " 'DAPI5_Nucleus_Intensity_Average' 'DAPI6_Cell_Intensity_Average'\n", " 'DAPI6_Cytoplasm_Intensity_Average' 'DAPI6_Nucleus_Intensity_Average'\n", " 'DAPI7_Cell_Intensity_Average' 'DAPI7_Cytoplasm_Intensity_Average'\n", " 'DAPI7_Nucleus_Intensity_Average' 'DAPI8_Cell_Intensity_Average'\n", " 'DAPI8_Cytoplasm_Intensity_Average' 'DAPI8_Nucleus_Intensity_Average'\n", " 'Desmin_Cell_Intensity_Average' 'Desmin_Cytoplasm_Intensity_Average'\n", " 'Desmin_Nucleus_Intensity_Average' 'Ecad_Cell_Intensity_Average'\n", " 'Ecad_Cytoplasm_Intensity_Average' 'Ecad_Nucleus_Intensity_Average'\n", " 'Fibronectin_Cell_Intensity_Average'\n", " 'Fibronectin_Cytoplasm_Intensity_Average'\n", " 'Fibronectin_Nucleus_Intensity_Average' 'FOXP3_Cell_Intensity_Average'\n", " 'FOXP3_Cytoplasm_Intensity_Average' 'FOXP3_Nucleus_Intensity_Average'\n", " 'GATA3_Cell_Intensity_Average' 'GATA3_Cytoplasm_Intensity_Average'\n", " 'GATA3_Nucleus_Intensity_Average' 'HLA_Cell_Intensity_Average'\n", " 'HLA_Cytoplasm_Intensity_Average' 'HLA_Nucleus_Intensity_Average'\n", " 'Ki67_Cell_Intensity_Average' 'Ki67_Cytoplasm_Intensity_Average'\n", " 'Ki67_Nucleus_Intensity_Average' 'MMP9_Cell_Intensity_Average'\n", " 'MMP9_Cytoplasm_Intensity_Average' 'MMP9_Nucleus_Intensity_Average'\n", " 'PD1_Cell_Intensity_Average' 'PD1_Cytoplasm_Intensity_Average'\n", " 'PD1_Nucleus_Intensity_Average' 'PDGFR_Cell_Intensity_Average'\n", " 'PDGFR_Cytoplasm_Intensity_Average' 'PDGFR_Nucleus_Intensity_Average'\n", " 'PDL1_Cell_Intensity_Average' 'PDL1_Cytoplasm_Intensity_Average'\n", " 'PDL1_Nucleus_Intensity_Average' 'r5c2_Cell_Intensity_Average'\n", " 'r5c2_Cytoplasm_Intensity_Average' 'r5c2_Nucleus_Intensity_Average'\n", " 'r7c2_Cell_Intensity_Average' 'r7c2_Cytoplasm_Intensity_Average'\n", " 'r7c2_Nucleus_Intensity_Average' 'r8c2_Cell_Intensity_Average'\n", " 'r8c2_Cytoplasm_Intensity_Average' 'r8c2_Nucleus_Intensity_Average'\n", " 'Sting_Cell_Intensity_Average' 'Sting_Cytoplasm_Intensity_Average'\n", " 'Sting_Nucleus_Intensity_Average' 'Vimentin_Cell_Intensity_Average'\n", " 'Vimentin_Cytoplasm_Intensity_Average'\n", " 'Vimentin_Nucleus_Intensity_Average']\n", "\n", "df :\n", " Cell_Size Nuc_X Nuc_Y_Inv ROI_index Nucleus_Size \\\n", "ID \n", "1@1 339 1484.771729 16632.205078 0 127 \n", "\n", " Nucleus_Roundness AF488_Cell_Intensity_Average \\\n", "ID \n", "1@1 0.95504 2385.867188 \n", "\n", " AF488_Cytoplasm_Intensity_Average AF488_Nucleus_Intensity_Average \\\n", "ID \n", "1@1 2356.6604 2434.62207 \n", "\n", " AF555_Cell_Intensity_Average ... r7c2_Nucleus_Intensity_Average \\\n", "ID ... \n", "1@1 1358.528076 ... 290.582672 \n", "\n", " r8c2_Cell_Intensity_Average r8c2_Cytoplasm_Intensity_Average \\\n", "ID \n", "1@1 341.790558 337.82547 \n", "\n", " r8c2_Nucleus_Intensity_Average Sting_Cell_Intensity_Average \\\n", "ID \n", "1@1 348.409454 1567.100342 \n", "\n", " Sting_Cytoplasm_Intensity_Average Sting_Nucleus_Intensity_Average \\\n", "ID \n", "1@1 1533.22168 1623.653564 \n", "\n", " Vimentin_Cell_Intensity_Average Vimentin_Cytoplasm_Intensity_Average \\\n", "ID \n", "1@1 7279.144531 7040.108398 \n", "\n", " Vimentin_Nucleus_Intensity_Average \n", "ID \n", "1@1 7678.165527 \n", "\n", "[1 rows x 141 columns] \n", "\n", "df's columns :\n", " Index(['Cell_Size', 'Nuc_X', 'Nuc_Y_Inv', 'ROI_index', 'Nucleus_Size',\n", " 'Nucleus_Roundness', 'AF488_Cell_Intensity_Average',\n", " 'AF488_Cytoplasm_Intensity_Average', 'AF488_Nucleus_Intensity_Average',\n", " 'AF555_Cell_Intensity_Average',\n", " ...\n", " 'r7c2_Nucleus_Intensity_Average', 'r8c2_Cell_Intensity_Average',\n", " 'r8c2_Cytoplasm_Intensity_Average', 'r8c2_Nucleus_Intensity_Average',\n", " 'Sting_Cell_Intensity_Average', 'Sting_Cytoplasm_Intensity_Average',\n", " 'Sting_Nucleus_Intensity_Average', 'Vimentin_Cell_Intensity_Average',\n", " 'Vimentin_Cytoplasm_Intensity_Average',\n", " 'Vimentin_Nucleus_Intensity_Average'],\n", " dtype='object', length=141) \n", "\n", "df's index :\n", " Index(['1@1'], dtype='object', name='ID') \n", "\n", "df's index name :\n", " ID\n" ] } ], "source": [ "# Verify that the ID column in input file became the index\n", "# Verify that the index name column is \"ID\", if not, rename it\n", "if df.index.name != \"ID\":\n", " print(\"Expected the first column in input file (index_col = 0) to be 'ID'. \\n\"\n", " \"This column will be used to set the index names (cell number for each sample). \\n\"\n", " \"It appears that the column '\" + df.index.name + \"' was actually the imported as the index column.\")\n", " #df.index.name = 'ID'\n", " print(\"A new index name (first column) will be given ('ID') to replace the current one '\" + df.index.name + \"'\\n\")\n", "\n", "# Apply the changes to the headers as specified with apply_header_changes() function (in my_modules.py)\n", "# Apply the changes to the dataframe rows as specified with apply_df_changes() function (in my_modules.py)\n", "df = apply_header_changes(df)\n", "df = apply_df_changes(df)\n", "\n", "# Set variable to hold default header values\n", "expected_headers = df.columns.values\n", "print(expected_headers)\n", "\n", "# Make sure the file is now formated correctly\n", "print(\"\\ndf :\\n\", df.head(), \"\\n\")\n", "print(\"df's columns :\\n\", df.columns, \"\\n\")\n", "print(\"df's index :\\n\", df.index, \"\\n\")\n", "print(\"df's index name :\\n\", df.index.name)" ] }, { "cell_type": "code", "execution_count": 16, "id": "1e7448a1-b156-4d5e-9698-e94ebe9ef7b4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Used DD3S1.csv to determine the expected and corrected headers for all files.\n", "\n", "These headers are: \n", "Cell_Size, Nuc_X, Nuc_Y_Inv, ROI_index, Nucleus_Size, Nucleus_Roundness, AF488_Cell_Intensity_Average, AF488_Cytoplasm_Intensity_Average, AF488_Nucleus_Intensity_Average, AF555_Cell_Intensity_Average, AF555_Cytoplasm_Intensity_Average, AF555_Nucleus_Intensity_Average, AF647_Cell_Intensity_Average, AF647_Cytoplasm_Intensity_Average, AF647_Nucleus_Intensity_Average, AF750_Cell_Intensity_Average, AF750_Cytoplasm_Intensity_Average, AF750_Nucleus_Intensity_Average, aSMA_Cell_Intensity_Average, aSMA_Cytoplasm_Intensity_Average, aSMA_Nucleus_Intensity_Average, AXL_Cell_Intensity_Average, AXL_Cytoplasm_Intensity_Average, AXL_Nucleus_Intensity_Average, B7H4_Cell_Intensity_Average, B7H4_Cytoplasm_Intensity_Average, B7H4_Nucleus_Intensity_Average, CA9_Cell_Intensity_Average, CA9_Cytoplasm_Intensity_Average, CA9_Nucleus_Intensity_Average, CD4_Cell_Intensity_Average, CD4_Cytoplasm_Intensity_Average, CD4_Nucleus_Intensity_Average, CD8_Cell_Intensity_Average, CD8_Cytoplasm_Intensity_Average, CD8_Nucleus_Intensity_Average, CD11b_Cell_Intensity_Average, CD11b_Cytoplasm_Intensity_Average, CD11b_Nucleus_Intensity_Average, CD11c_Cell_Intensity_Average, CD11c_Cytoplasm_Intensity_Average, CD11c_Nucleus_Intensity_Average, CD20_Cell_Intensity_Average, CD20_Cytoplasm_Intensity_Average, CD20_Nucleus_Intensity_Average, CD31_Cell_Intensity_Average, CD31_Cytoplasm_Intensity_Average, CD31_Nucleus_Intensity_Average, CD44_Cell_Intensity_Average, CD44_Cytoplasm_Intensity_Average, CD44_Nucleus_Intensity_Average, CD45_Cell_Intensity_Average, CD45_Cytoplasm_Intensity_Average, CD45_Nucleus_Intensity_Average, CD68_Cell_Intensity_Average, CD68_Cytoplasm_Intensity_Average, CD68_Nucleus_Intensity_Average, CD163_Cell_Intensity_Average, CD163_Cytoplasm_Intensity_Average, CD163_Nucleus_Intensity_Average, CKs_Cell_Intensity_Average, CKs_Cytoplasm_Intensity_Average, CKs_Nucleus_Intensity_Average, ColVI_Cell_Intensity_Average, ColVI_Cytoplasm_Intensity_Average, ColVI_Nucleus_Intensity_Average, DAPI0_Cell_Intensity_Average, DAPI0_Cytoplasm_Intensity_Average, DAPI0_Nucleus_Intensity_Average, DAPI1_Cell_Intensity_Average, DAPI1_Cytoplasm_Intensity_Average, DAPI1_Nucleus_Intensity_Average, DAPI2_Cell_Intensity_Average, DAPI2_Cytoplasm_Intensity_Average, DAPI2_Nucleus_Intensity_Average, DAPI3_Cell_Intensity_Average, DAPI3_Cytoplasm_Intensity_Average, DAPI3_Nucleus_Intensity_Average, DAPI4_Cell_Intensity_Average, DAPI4_Cytoplasm_Intensity_Average, DAPI4_Nucleus_Intensity_Average, DAPI5_Cell_Intensity_Average, DAPI5_Cytoplasm_Intensity_Average, DAPI5_Nucleus_Intensity_Average, DAPI6_Cell_Intensity_Average, DAPI6_Cytoplasm_Intensity_Average, DAPI6_Nucleus_Intensity_Average, DAPI7_Cell_Intensity_Average, DAPI7_Cytoplasm_Intensity_Average, DAPI7_Nucleus_Intensity_Average, DAPI8_Cell_Intensity_Average, DAPI8_Cytoplasm_Intensity_Average, DAPI8_Nucleus_Intensity_Average, Desmin_Cell_Intensity_Average, Desmin_Cytoplasm_Intensity_Average, Desmin_Nucleus_Intensity_Average, Ecad_Cell_Intensity_Average, Ecad_Cytoplasm_Intensity_Average, Ecad_Nucleus_Intensity_Average, Fibronectin_Cell_Intensity_Average, Fibronectin_Cytoplasm_Intensity_Average, Fibronectin_Nucleus_Intensity_Average, FOXP3_Cell_Intensity_Average, FOXP3_Cytoplasm_Intensity_Average, FOXP3_Nucleus_Intensity_Average, GATA3_Cell_Intensity_Average, GATA3_Cytoplasm_Intensity_Average, GATA3_Nucleus_Intensity_Average, HLA_Cell_Intensity_Average, HLA_Cytoplasm_Intensity_Average, HLA_Nucleus_Intensity_Average, Ki67_Cell_Intensity_Average, Ki67_Cytoplasm_Intensity_Average, Ki67_Nucleus_Intensity_Average, MMP9_Cell_Intensity_Average, MMP9_Cytoplasm_Intensity_Average, MMP9_Nucleus_Intensity_Average, PD1_Cell_Intensity_Average, PD1_Cytoplasm_Intensity_Average, PD1_Nucleus_Intensity_Average, PDGFR_Cell_Intensity_Average, PDGFR_Cytoplasm_Intensity_Average, PDGFR_Nucleus_Intensity_Average, PDL1_Cell_Intensity_Average, PDL1_Cytoplasm_Intensity_Average, PDL1_Nucleus_Intensity_Average, r5c2_Cell_Intensity_Average, r5c2_Cytoplasm_Intensity_Average, r5c2_Nucleus_Intensity_Average, r7c2_Cell_Intensity_Average, r7c2_Cytoplasm_Intensity_Average, r7c2_Nucleus_Intensity_Average, r8c2_Cell_Intensity_Average, r8c2_Cytoplasm_Intensity_Average, r8c2_Nucleus_Intensity_Average, Sting_Cell_Intensity_Average, Sting_Cytoplasm_Intensity_Average, Sting_Nucleus_Intensity_Average, Vimentin_Cell_Intensity_Average, Vimentin_Cytoplasm_Intensity_Average, Vimentin_Nucleus_Intensity_Average\n" ] } ], "source": [ "print(\"Used \" + ls_samples[0] + \" to determine the expected and corrected headers for all files.\\n\")\n", "print(\"These headers are: \\n\" + \", \".join([h for h in expected_headers]))" ] }, { "cell_type": "code", "execution_count": 17, "id": "d0e4670c-acd0-4183-ad09-4b0442abb2ef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DD3S1.csv file is processed !\n", "\n", "DD3S2.csv file is processed !\n", "\n", "DD3S3.csv file is processed !\n", "\n", "TMA.csv file is processed !\n", "\n" ] } ], "source": [ "# Import all the others files\n", "dfs = {}\n", "\n", "###############################\n", "# !! This may take a while !! #\n", "###############################\n", "for sample in ls_samples:\n", " file_path = os.path.join(input_data_dir,sample)\n", " \n", " try:\n", " # Read the CSV file\n", " df = pd.read_csv(file_path, index_col=0)\n", " # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it\n", " \n", " if not df.empty:\n", " # Manipulations necessary for concatenation\n", " df = apply_header_changes(df)\n", " df = apply_df_changes(df)\n", " # Reorder the columns to match the expected headers list\n", " df = df.reindex(columns=expected_headers)\n", " print(sample, \"file is processed !\\n\")\n", " #print(df) \n", " \n", " # Compare df's header df against what is expected\n", " compare_headers(expected_headers, df.columns.values, sample)\n", " #print(df.columns.values)\n", " # Add a new colunm to identify the csv file (sample) where the df comes from\n", " df['Sample_ID'] = sample \n", " \n", " except pd.errors.EmptyDataError:\n", " print(f'\\nEmpty data error in {sample} file. Removing from analysis...')\n", " ls_samples.remove(sample) \n", " \n", " # Add df to dfs \n", " dfs[sample] = df\n", "\n", "#print(dfs)" ] }, { "cell_type": "code", "execution_count": 18, "id": "69a6106c-8106-427c-8d53-abc26c9db6e1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Cell_Size Nuc_X Nuc_Y_Inv ROI_index Nucleus_Size \\\n", "ID \n", "1@1 339 1484.771729 16632.205078 0 127 \n", "2@1 344 1426.250000 16627.384766 0 112 \n", "3@1 422 1531.110474 16622.238281 0 181 \n", "4@1 278 1518.907593 16623.007812 0 119 \n", "5@1 502 1488.051758 16616.375000 0 232 \n", "\n", " Nucleus_Roundness AF488_Cell_Intensity_Average \\\n", "ID \n", "1@1 0.955040 2385.867188 \n", "2@1 0.966643 2818.250000 \n", "3@1 0.721534 2162.047363 \n", "4@1 0.587196 2422.715820 \n", "5@1 0.655828 2265.306885 \n", "\n", " AF488_Cytoplasm_Intensity_Average AF488_Nucleus_Intensity_Average \\\n", "ID \n", "1@1 2356.660400 2434.622070 \n", "2@1 2884.366455 2681.294678 \n", "3@1 2124.817383 2211.618896 \n", "4@1 2411.867920 2437.210205 \n", "5@1 2154.796387 2393.918213 \n", "\n", " AF555_Cell_Intensity_Average ... r8c2_Cell_Intensity_Average \\\n", "ID ... \n", "1@1 1358.528076 ... 341.790558 \n", "2@1 1472.325562 ... 365.531982 \n", "3@1 1289.054443 ... 320.874420 \n", "4@1 1397.992798 ... 343.320129 \n", "5@1 1288.657349 ... 326.241028 \n", "\n", " r8c2_Cytoplasm_Intensity_Average r8c2_Nucleus_Intensity_Average \\\n", "ID \n", "1@1 337.825470 348.409454 \n", "2@1 369.340515 357.642853 \n", "3@1 315.605804 327.889496 \n", "4@1 338.679260 349.520996 \n", "5@1 314.748138 339.616394 \n", "\n", " Sting_Cell_Intensity_Average Sting_Cytoplasm_Intensity_Average \\\n", "ID \n", "1@1 1567.100342 1533.221680 \n", "2@1 1508.014526 1565.086182 \n", "3@1 1841.360229 1772.647339 \n", "4@1 1723.863281 1688.094360 \n", "5@1 1711.464111 1629.670410 \n", "\n", " Sting_Nucleus_Intensity_Average Vimentin_Cell_Intensity_Average \\\n", "ID \n", "1@1 1623.653564 7279.144531 \n", "2@1 1389.794678 6123.456543 \n", "3@1 1932.850830 4252.185059 \n", "4@1 1771.655518 6178.647461 \n", "5@1 1806.655151 5208.479980 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average Vimentin_Nucleus_Intensity_Average \\\n", "ID \n", "1@1 7040.108398 7678.165527 \n", "2@1 6734.603516 4857.508789 \n", "3@1 4473.178223 3957.933594 \n", "4@1 5316.924316 7330.025391 \n", "5@1 4386.700195 6164.862305 \n", "\n", " Sample_ID \n", "ID \n", "1@1 DD3S1.csv \n", "2@1 DD3S1.csv \n", "3@1 DD3S1.csv \n", "4@1 DD3S1.csv \n", "5@1 DD3S1.csv \n", "\n", "[5 rows x 142 columns]\n" ] } ], "source": [ "# Merge dfs into one df\n", "df = pd.concat(dfs.values(), ignore_index=False , sort = False)\n", "#del dfs\n", "\n", "print(df.head())" ] }, { "cell_type": "code", "execution_count": 19, "id": "5c724db9-eb76-4af1-8fe3-8beff43d8940", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Cell_Size Nuc_X Nuc_Y_Inv ROI_index Nucleus_Size \\\n", "DD3S1_Cell_0 339 1484.771729 16632.205078 0 127 \n", "DD3S1_Cell_1 344 1426.250000 16627.384766 0 112 \n", "DD3S1_Cell_2 422 1531.110474 16622.238281 0 181 \n", "DD3S1_Cell_3 278 1518.907593 16623.007812 0 119 \n", "DD3S1_Cell_4 502 1488.051758 16616.375000 0 232 \n", "\n", " Nucleus_Roundness AF488_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 0.955040 2385.867188 \n", "DD3S1_Cell_1 0.966643 2818.250000 \n", "DD3S1_Cell_2 0.721534 2162.047363 \n", "DD3S1_Cell_3 0.587196 2422.715820 \n", "DD3S1_Cell_4 0.655828 2265.306885 \n", "\n", " AF488_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 2356.660400 \n", "DD3S1_Cell_1 2884.366455 \n", "DD3S1_Cell_2 2124.817383 \n", "DD3S1_Cell_3 2411.867920 \n", "DD3S1_Cell_4 2154.796387 \n", "\n", " AF488_Nucleus_Intensity_Average AF555_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 2434.622070 1358.528076 \n", "DD3S1_Cell_1 2681.294678 1472.325562 \n", "DD3S1_Cell_2 2211.618896 1289.054443 \n", "DD3S1_Cell_3 2437.210205 1397.992798 \n", "DD3S1_Cell_4 2393.918213 1288.657349 \n", "\n", " ... r8c2_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 ... 341.790558 \n", "DD3S1_Cell_1 ... 365.531982 \n", "DD3S1_Cell_2 ... 320.874420 \n", "DD3S1_Cell_3 ... 343.320129 \n", "DD3S1_Cell_4 ... 326.241028 \n", "\n", " r8c2_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 337.825470 \n", "DD3S1_Cell_1 369.340515 \n", "DD3S1_Cell_2 315.605804 \n", "DD3S1_Cell_3 338.679260 \n", "DD3S1_Cell_4 314.748138 \n", "\n", " r8c2_Nucleus_Intensity_Average Sting_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 348.409454 1567.100342 \n", "DD3S1_Cell_1 357.642853 1508.014526 \n", "DD3S1_Cell_2 327.889496 1841.360229 \n", "DD3S1_Cell_3 349.520996 1723.863281 \n", "DD3S1_Cell_4 339.616394 1711.464111 \n", "\n", " Sting_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 1533.221680 \n", "DD3S1_Cell_1 1565.086182 \n", "DD3S1_Cell_2 1772.647339 \n", "DD3S1_Cell_3 1688.094360 \n", "DD3S1_Cell_4 1629.670410 \n", "\n", " Sting_Nucleus_Intensity_Average \\\n", "DD3S1_Cell_0 1623.653564 \n", "DD3S1_Cell_1 1389.794678 \n", "DD3S1_Cell_2 1932.850830 \n", "DD3S1_Cell_3 1771.655518 \n", "DD3S1_Cell_4 1806.655151 \n", "\n", " Vimentin_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 7279.144531 \n", "DD3S1_Cell_1 6123.456543 \n", "DD3S1_Cell_2 4252.185059 \n", "DD3S1_Cell_3 6178.647461 \n", "DD3S1_Cell_4 5208.479980 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 7040.108398 \n", "DD3S1_Cell_1 6734.603516 \n", "DD3S1_Cell_2 4473.178223 \n", "DD3S1_Cell_3 5316.924316 \n", "DD3S1_Cell_4 4386.700195 \n", "\n", " Vimentin_Nucleus_Intensity_Average Sample_ID \n", "DD3S1_Cell_0 7678.165527 DD3S1.csv \n", "DD3S1_Cell_1 4857.508789 DD3S1.csv \n", "DD3S1_Cell_2 3957.933594 DD3S1.csv \n", "DD3S1_Cell_3 7330.025391 DD3S1.csv \n", "DD3S1_Cell_4 6164.862305 DD3S1.csv \n", "\n", "[5 rows x 142 columns]\n" ] } ], "source": [ "# Set index to Sample_ID + cell number : \n", "# create a new custom index for df based on the sample names and integer cell numbers, and then remove the temporary columns 'level_0' and 'index' that were introduced during the operations\n", "\n", "# Creates a copy of the DataFrame df and resets its index without creating a new column for the old index\n", "# This essentially removes the old index column and replaces it with a default integer index\n", "df = df.copy().reset_index(drop=True)\n", "\n", "#print(df)\n", "\n", "# Initializing an empty list index to store the new index labels for the DataFrame\n", "index = []\n", "\n", "for sample in ls_samples:\n", " # Extract a chunk of data from the original df where the 'Sample_ID' column matches the current sample name\n", " # This chunk is stored in the df_chunk df, which is a subset of the original data for that specific sample\n", " df_chunk = df.loc[df['Sample_ID'] == sample,:].copy()\n", " old_index = df_chunk.index\n", " # Reset the index of the df_chunk df, removing the old index and replacing it with a default integer index\n", " df_chunk = df_chunk.reset_index(drop=True)\n", " # A new index is created for the df_chunk df. It combines the sample name with 'Cell_' and the integer index values, converting them to strings\n", " # This new index will have labels like 'SampleName_Cell_0', 'SampleName_Cell_1', and so on.\n", " sample = sample.split('.')[0]\n", " df_chunk = df_chunk.set_index(f'{sample}_Cell_' + df_chunk.index.astype(str))\n", " # The index values of df_chunk are then added to the index list\n", " index = index + df_chunk.index.values.tolist()\n", "\n", "# After processing all the samples in the loop, assign the index list as the new index of the original df.\n", "df.index = index\n", "# Remove the 'level_0' and 'index' columns from df\n", "df = df.loc[:,~df.columns.isin(['level_0','index'])]\n", "\n", "print(df.head())" ] }, { "cell_type": "markdown", "id": "c95a292d-72df-4f1e-8a9d-a3de84d99057", "metadata": {}, "source": [ "### I.3.2. NOT_INTENSITIES" ] }, { "cell_type": "code", "execution_count": 20, "id": "8e4bb227-2a4f-477b-a435-e85178d1003a", "metadata": {}, "outputs": [], "source": [ "# not_intensities is the list of the columns unrelated to the markers fluorescence intensities\n", "# Can include items that aren't in a given header.\n", "#not_intensitiehttp://localhost:8888/lab/tree/Downloads/wetransfer_data-zip_2024-05-17_1431/1_qc_eda.ipynb\n", "#I.3.2.-NOT_INTENSITIESs = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size', \n", "# 'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID', \n", "# 'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']" ] }, { "cell_type": "code", "execution_count": 21, "id": "3a0b81d8-1fa0-45da-92ab-2ee59ef9d51d", "metadata": {}, "outputs": [], "source": [ "# not_intensities is the list of the columns unrelated to the markers fluorescence intensities\n", "# Can include items that aren't in a given header.\n", "not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size', \n", " 'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID', \n", " 'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']\n" ] }, { "cell_type": "code", "execution_count": 22, "id": "26669861-4eef-43f5-82f8-532f83bd7f9b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'not_intensities.csv' already exists.\n", "Reconciling file and Jupyter notebook lists.\n" ] } ], "source": [ "path_not_intensities = os.path.join(metadata_dir,\"not_intensities.csv\")\n", "\n", "# If this file already exists, add only not_intensities items of the list not already present in file\n", "if os.path.exists(path_not_intensities):\n", " print(\"'not_intensities.csv' already exists.\")\n", " print(\"Reconciling file and Jupyter notebook lists.\")\n", " file_not_intensities = open(path_not_intensities, \"r\")\n", " file_ni = file_not_intensities.read().splitlines()\n", " # Set difference to identify items not already in file\n", " to_add = set(not_intensities) - set(file_ni)\n", " # We want not_intensities to the a complete list\n", " not_intensities = list(set(file_ni) | set(not_intensities))\n", " file_not_intensities.close()\n", " file_not_intensities = open(path_not_intensities, \"a\")\n", " for item in to_add:\n", " file_not_intensities.write(item +\"\\n\")\n", " file_not_intensities.close()\n", "\n", "else:\n", " # The file does not yet exist\n", " print(\"Could not find \" + path_not_intensities + \". Creating now.\")\n", " file_not_intensities = open(path_not_intensities, \"w\")\n", " for item in not_intensities:\n", " file_not_intensities.write(item + \"\\n\")\n", " file_not_intensities.close()" ] }, { "cell_type": "code", "execution_count": 23, "id": "175ef4cd-600f-47d6-8e16-583f4a1a0abf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ROI_index',\n", " 'Nuc_Y',\n", " 'Nuc_Y_Inv',\n", " 'Nuc_X_Inv',\n", " 'cell_subtype',\n", " 'Cell_ID',\n", " 'Cytoplasm_Size',\n", " 'ID',\n", " 'replicate_ID',\n", " 'Nucleus_Roundness',\n", " 'Patient',\n", " 'Primary_chem(1)_vs_surg(0)',\n", " 'cell_type',\n", " 'immune_checkpoint',\n", " 'Cell_Size',\n", " 'Sample_ID',\n", " 'Nucleus_Size',\n", " 'Nuc_X',\n", " 'cluster',\n", " 'Unique_ROI_index',\n", " 'AF488_Cell_Intensity_Average',\n", " 'AF488_Cytoplasm_Intensity_Average',\n", " 'AF488_Nucleus_Intensity_Average',\n", " 'AF555_Cell_Intensity_Average',\n", " 'AF555_Cytoplasm_Intensity_Average',\n", " 'AF555_Nucleus_Intensity_Average',\n", " 'AF647_Cell_Intensity_Average',\n", " 'AF647_Cytoplasm_Intensity_Average',\n", " 'AF647_Nucleus_Intensity_Average',\n", " 'AF750_Cell_Intensity_Average',\n", " 'AF750_Cytoplasm_Intensity_Average',\n", " 'AF750_Nucleus_Intensity_Average',\n", " 'aSMA_Cell_Intensity_Average',\n", " 'aSMA_Cytoplasm_Intensity_Average',\n", " 'aSMA_Nucleus_Intensity_Average',\n", " 'AXL_Cell_Intensity_Average',\n", " 'AXL_Cytoplasm_Intensity_Average',\n", " 'AXL_Nucleus_Intensity_Average',\n", " 'B7H4_Cell_Intensity_Average',\n", " 'B7H4_Cytoplasm_Intensity_Average',\n", " 'B7H4_Nucleus_Intensity_Average',\n", " 'CA9_Cell_Intensity_Average',\n", " 'CA9_Cytoplasm_Intensity_Average',\n", " 'CA9_Nucleus_Intensity_Average',\n", " 'CD4_Cell_Intensity_Average',\n", " 'CD4_Cytoplasm_Intensity_Average',\n", " 'CD4_Nucleus_Intensity_Average',\n", " 'CD8_Cell_Intensity_Average',\n", " 'CD8_Cytoplasm_Intensity_Average',\n", " 'CD8_Nucleus_Intensity_Average',\n", " 'CD11b_Cell_Intensity_Average',\n", " 'CD11b_Cytoplasm_Intensity_Average',\n", " 'CD11b_Nucleus_Intensity_Average',\n", " 'CD11c_Cell_Intensity_Average',\n", " 'CD11c_Cytoplasm_Intensity_Average',\n", " 'CD11c_Nucleus_Intensity_Average',\n", " 'CD20_Cell_Intensity_Average',\n", " 'CD20_Cytoplasm_Intensity_Average',\n", " 'CD20_Nucleus_Intensity_Average',\n", " 'CD31_Cell_Intensity_Average',\n", " 'CD31_Cytoplasm_Intensity_Average',\n", " 'CD31_Nucleus_Intensity_Average',\n", " 'CD44_Cell_Intensity_Average',\n", " 'CD44_Cytoplasm_Intensity_Average',\n", " 'CD44_Nucleus_Intensity_Average',\n", " 'CD45_Cell_Intensity_Average',\n", " 'CD45_Cytoplasm_Intensity_Average',\n", " 'CD45_Nucleus_Intensity_Average',\n", " 'CD68_Cell_Intensity_Average',\n", " 'CD68_Cytoplasm_Intensity_Average',\n", " 'CD68_Nucleus_Intensity_Average',\n", " 'CD163_Cell_Intensity_Average',\n", " 'CD163_Cytoplasm_Intensity_Average',\n", " 'CD163_Nucleus_Intensity_Average',\n", " 'CKs_Cell_Intensity_Average',\n", " 'CKs_Cytoplasm_Intensity_Average',\n", " 'CKs_Nucleus_Intensity_Average',\n", " 'ColVI_Cell_Intensity_Average',\n", " 'ColVI_Cytoplasm_Intensity_Average',\n", " 'ColVI_Nucleus_Intensity_Average',\n", " 'DAPI0_Cell_Intensity_Average',\n", " 'DAPI0_Cytoplasm_Intensity_Average',\n", " 'DAPI0_Nucleus_Intensity_Average',\n", " 'DAPI1_Cell_Intensity_Average',\n", " 'DAPI1_Cytoplasm_Intensity_Average',\n", " 'DAPI1_Nucleus_Intensity_Average',\n", " 'DAPI2_Cell_Intensity_Average',\n", " 'DAPI2_Cytoplasm_Intensity_Average',\n", " 'DAPI2_Nucleus_Intensity_Average',\n", " 'DAPI3_Cell_Intensity_Average',\n", " 'DAPI3_Cytoplasm_Intensity_Average',\n", " 'DAPI3_Nucleus_Intensity_Average',\n", " 'DAPI4_Cell_Intensity_Average',\n", " 'DAPI4_Cytoplasm_Intensity_Average',\n", " 'DAPI4_Nucleus_Intensity_Average',\n", " 'DAPI5_Cell_Intensity_Average',\n", " 'DAPI5_Cytoplasm_Intensity_Average',\n", " 'DAPI5_Nucleus_Intensity_Average',\n", " 'DAPI6_Cell_Intensity_Average',\n", " 'DAPI6_Cytoplasm_Intensity_Average',\n", " 'DAPI6_Nucleus_Intensity_Average',\n", " 'DAPI7_Cell_Intensity_Average',\n", " 'DAPI7_Cytoplasm_Intensity_Average',\n", " 'DAPI7_Nucleus_Intensity_Average',\n", " 'DAPI8_Cell_Intensity_Average',\n", " 'DAPI8_Cytoplasm_Intensity_Average',\n", " 'DAPI8_Nucleus_Intensity_Average',\n", " 'Desmin_Cell_Intensity_Average',\n", " 'Desmin_Cytoplasm_Intensity_Average',\n", " 'Desmin_Nucleus_Intensity_Average',\n", " 'Ecad_Cell_Intensity_Average',\n", " 'Ecad_Cytoplasm_Intensity_Average',\n", " 'Ecad_Nucleus_Intensity_Average',\n", " 'Fibronectin_Cell_Intensity_Average',\n", " 'Fibronectin_Cytoplasm_Intensity_Average',\n", " 'Fibronectin_Nucleus_Intensity_Average',\n", " 'FOXP3_Cell_Intensity_Average',\n", " 'FOXP3_Cytoplasm_Intensity_Average',\n", " 'FOXP3_Nucleus_Intensity_Average',\n", " 'GATA3_Cell_Intensity_Average',\n", " 'GATA3_Cytoplasm_Intensity_Average',\n", " 'GATA3_Nucleus_Intensity_Average',\n", " 'HLA_Cell_Intensity_Average',\n", " 'HLA_Cytoplasm_Intensity_Average',\n", " 'HLA_Nucleus_Intensity_Average',\n", " 'Ki67_Cell_Intensity_Average',\n", " 'Ki67_Cytoplasm_Intensity_Average',\n", " 'Ki67_Nucleus_Intensity_Average',\n", " 'MMP9_Cell_Intensity_Average',\n", " 'MMP9_Cytoplasm_Intensity_Average',\n", " 'MMP9_Nucleus_Intensity_Average',\n", " 'PD1_Cell_Intensity_Average',\n", " 'PD1_Cytoplasm_Intensity_Average',\n", " 'PD1_Nucleus_Intensity_Average',\n", " 'PDGFR_Cell_Intensity_Average',\n", " 'PDGFR_Cytoplasm_Intensity_Average',\n", " 'PDGFR_Nucleus_Intensity_Average',\n", " 'PDL1_Cell_Intensity_Average',\n", " 'PDL1_Cytoplasm_Intensity_Average',\n", " 'PDL1_Nucleus_Intensity_Average',\n", " 'r5c2_Cell_Intensity_Average',\n", " 'r5c2_Cytoplasm_Intensity_Average',\n", " 'r5c2_Nucleus_Intensity_Average',\n", " 'r7c2_Cell_Intensity_Average',\n", " 'r7c2_Cytoplasm_Intensity_Average',\n", " 'r7c2_Nucleus_Intensity_Average',\n", " 'r8c2_Cell_Intensity_Average',\n", " 'r8c2_Cytoplasm_Intensity_Average',\n", " 'r8c2_Nucleus_Intensity_Average',\n", " 'Sting_Cell_Intensity_Average',\n", " 'Sting_Cytoplasm_Intensity_Average',\n", " 'Sting_Nucleus_Intensity_Average',\n", " 'Vimentin_Cell_Intensity_Average',\n", " 'Vimentin_Cytoplasm_Intensity_Average',\n", " 'Vimentin_Nucleus_Intensity_Average']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Columns we want to keep: not_intensities, and any intensity column that contains 'Intensity_Average' (drop any intensity marker column that is not a mean intensity)\n", "to_keep = not_intensities + [x for x in df.columns.values[~df.columns.isin(not_intensities)] if 'Intensity_Average' in x]\n", "\n", "to_keep" ] }, { "cell_type": "code", "execution_count": 24, "id": "7ecf0c10-605d-486e-9b9e-f3cbb2a349cc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " ROI_index Nuc_Y_Inv Nucleus_Roundness Cell_Size \\\n", "DD3S1_Cell_0 0 16632.205078 0.955040 339 \n", "DD3S1_Cell_1 0 16627.384766 0.966643 344 \n", "DD3S1_Cell_2 0 16622.238281 0.721534 422 \n", "DD3S1_Cell_3 0 16623.007812 0.587196 278 \n", "DD3S1_Cell_4 0 16616.375000 0.655828 502 \n", "\n", " Sample_ID Nucleus_Size Nuc_X \\\n", "DD3S1_Cell_0 DD3S1.csv 127 1484.771729 \n", "DD3S1_Cell_1 DD3S1.csv 112 1426.250000 \n", "DD3S1_Cell_2 DD3S1.csv 181 1531.110474 \n", "DD3S1_Cell_3 DD3S1.csv 119 1518.907593 \n", "DD3S1_Cell_4 DD3S1.csv 232 1488.051758 \n", "\n", " AF488_Cell_Intensity_Average AF488_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 2385.867188 2356.660400 \n", "DD3S1_Cell_1 2818.250000 2884.366455 \n", "DD3S1_Cell_2 2162.047363 2124.817383 \n", "DD3S1_Cell_3 2422.715820 2411.867920 \n", "DD3S1_Cell_4 2265.306885 2154.796387 \n", "\n", " AF488_Nucleus_Intensity_Average ... \\\n", "DD3S1_Cell_0 2434.622070 ... \n", "DD3S1_Cell_1 2681.294678 ... \n", "DD3S1_Cell_2 2211.618896 ... \n", "DD3S1_Cell_3 2437.210205 ... \n", "DD3S1_Cell_4 2393.918213 ... \n", "\n", " r7c2_Nucleus_Intensity_Average r8c2_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 290.582672 341.790558 \n", "DD3S1_Cell_1 304.133942 365.531982 \n", "DD3S1_Cell_2 271.353577 320.874420 \n", "DD3S1_Cell_3 292.134460 343.320129 \n", "DD3S1_Cell_4 284.642242 326.241028 \n", "\n", " r8c2_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 337.825470 \n", "DD3S1_Cell_1 369.340515 \n", "DD3S1_Cell_2 315.605804 \n", "DD3S1_Cell_3 338.679260 \n", "DD3S1_Cell_4 314.748138 \n", "\n", " r8c2_Nucleus_Intensity_Average Sting_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 348.409454 1567.100342 \n", "DD3S1_Cell_1 357.642853 1508.014526 \n", "DD3S1_Cell_2 327.889496 1841.360229 \n", "DD3S1_Cell_3 349.520996 1723.863281 \n", "DD3S1_Cell_4 339.616394 1711.464111 \n", "\n", " Sting_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 1533.221680 \n", "DD3S1_Cell_1 1565.086182 \n", "DD3S1_Cell_2 1772.647339 \n", "DD3S1_Cell_3 1688.094360 \n", "DD3S1_Cell_4 1629.670410 \n", "\n", " Sting_Nucleus_Intensity_Average \\\n", "DD3S1_Cell_0 1623.653564 \n", "DD3S1_Cell_1 1389.794678 \n", "DD3S1_Cell_2 1932.850830 \n", "DD3S1_Cell_3 1771.655518 \n", "DD3S1_Cell_4 1806.655151 \n", "\n", " Vimentin_Cell_Intensity_Average \\\n", "DD3S1_Cell_0 7279.144531 \n", "DD3S1_Cell_1 6123.456543 \n", "DD3S1_Cell_2 4252.185059 \n", "DD3S1_Cell_3 6178.647461 \n", "DD3S1_Cell_4 5208.479980 \n", "\n", " Vimentin_Cytoplasm_Intensity_Average \\\n", "DD3S1_Cell_0 7040.108398 \n", "DD3S1_Cell_1 6734.603516 \n", "DD3S1_Cell_2 4473.178223 \n", "DD3S1_Cell_3 5316.924316 \n", "DD3S1_Cell_4 4386.700195 \n", "\n", " Vimentin_Nucleus_Intensity_Average \n", "DD3S1_Cell_0 7678.165527 \n", "DD3S1_Cell_1 4857.508789 \n", "DD3S1_Cell_2 3957.933594 \n", "DD3S1_Cell_3 7330.025391 \n", "DD3S1_Cell_4 6164.862305 \n", "\n", "[5 rows x 142 columns]\n" ] } ], "source": [ "# However, our to_keep list contains items that might not be in our df headers!\n", "# These items are from our not_intensities list. So let's ask for only those items from to_keep that are actually found in our df\n", "# Retains only the columns from the to_keep list that are found in the df's headers (columns). \n", "# This ensures that we are only keeping the columns that exist in your df, avoiding any potential issues with non-existent column names. \n", "# The result is a df containing only the specified columns.\n", "df = df[[x for x in to_keep if x in df.columns.values]]\n", "\n", "print(df.head())" ] }, { "cell_type": "markdown", "id": "284c8270-e13e-4f93-839b-5774a7fc9f4d", "metadata": {}, "source": [ "## I.4. QC CHECKS" ] }, { "cell_type": "code", "execution_count": 25, "id": "91d9bebf-3c62-408e-923e-00447527ae3c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['DD3S1_Cell_0', 'DD3S1_Cell_1', 'DD3S1_Cell_2', 'DD3S1_Cell_3',\n", " 'DD3S1_Cell_4', 'DD3S1_Cell_5', 'DD3S1_Cell_6', 'DD3S1_Cell_7',\n", " 'DD3S1_Cell_8', 'DD3S1_Cell_9',\n", " ...\n", " 'TMA_Cell_115751', 'TMA_Cell_115752', 'TMA_Cell_115753',\n", " 'TMA_Cell_115754', 'TMA_Cell_115755', 'TMA_Cell_115756',\n", " 'TMA_Cell_115757', 'TMA_Cell_115758', 'TMA_Cell_115759',\n", " 'TMA_Cell_115760'],\n", " dtype='object', length=433976)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's take a look at a few features to make sure our dataframe is as expected\n", "df.index" ] }, { "cell_type": "code", "execution_count": 26, "id": "fe138d0d-7b02-48ac-af9e-caf85c4d3b73", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(433976, 142)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 27, "id": "e1f30957-ab09-416b-8ed5-41b99a5c0b51", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check for NaN entries (should not be any unless columns do not align)\n", "# False means no NaN entries \n", "# True means NaN entries \n", "df.isnull().any().any()" ] }, { "cell_type": "code", "execution_count": 28, "id": "86e29b98-4758-4014-b976-db155fa9205f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All expected filenames are present in big df Sample_ID column.\n", "DD3S1_Cell_0 DD3S1.csv\n", "DD3S1_Cell_1 DD3S1.csv\n", "DD3S1_Cell_2 DD3S1.csv\n", "DD3S1_Cell_3 DD3S1.csv\n", "DD3S1_Cell_4 DD3S1.csv\n", " ... \n", "TMA_Cell_115756 TMA.csv\n", "TMA_Cell_115757 TMA.csv\n", "TMA_Cell_115758 TMA.csv\n", "TMA_Cell_115759 TMA.csv\n", "TMA_Cell_115760 TMA.csv\n", "Name: Sample_ID, Length: 433976, dtype: object\n" ] } ], "source": [ "# Check that all expected files were imported into final dataframe\n", "if sorted(df.Sample_ID.unique()) == sorted(ls_samples):\n", " print(\"All expected filenames are present in big df Sample_ID column.\")\n", "else:\n", " compare_headers(['no samples'], df.Sample_ID.unique(), \"big df Sample_ID column\")\n", "\n", "print(df.Sample_ID)" ] }, { "cell_type": "code", "execution_count": 29, "id": "66e0487d-8a29-400d-bde6-afde02985fdb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df.shape before removing 0 mean values: (433976, 142)\n", "No zero intensity values found in the DataFrame.\n" ] } ], "source": [ "# Delete rows that have 0 value mean intensities for intensity columns\n", "print(\"df.shape before removing 0 mean values: \", df.shape)\n", "\n", "# We use the apply method on df to calculate the mean intensity for each row. It's done this by applying a lambda function to each row. \n", "# The lambda function excludes the columns listed in the not_intensities list (which are not to be considered for mean intensity calculations) \n", "# and calculates the mean of the remaining values in each row.\n", "###############################\n", "# !! This may take a while !! #\n", "###############################\n", "# Calculate mean intensity excluding 'not_intensities' columns\n", "mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)\n", "\n", "# Check if there are any 0 mean intensity values\n", "if (mean_intensity == 0).any():\n", " df = df.loc[mean_intensity > 0, :]\n", " print(\"df.shape after removing 0 mean values: \", df.shape)\n", "else:\n", " print(\"No zero intensity values found in the DataFrame.\")" ] }, { "cell_type": "code", "execution_count": 30, "id": "93ff0fac-33ac-4120-b1cf-f3d413489b58", "metadata": {}, "outputs": [], "source": [ "# Get quantiles (5th, 50th, 95th)\n", "# List of nucleus size percentiles to extract \n", "qs = [0.05,0.50,0.95] " ] }, { "cell_type": "code", "execution_count": 31, "id": "86b0158b-6cb1-4ff8-815b-26aeed908b4b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.05 42.0\n", "0.50 88.0\n", "0.95 217.0\n", "Name: Nucleus_Size, dtype: float64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"Nucleus_Size\"].quantile(q=qs)" ] }, { "cell_type": "code", "execution_count": 32, "id": "d7d6bf19-f83c-47b6-8e3a-a32d78e64a5c", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "The plot is saved at: /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda/images/Nucleus_Size_Distribution.png\n" ] } ], "source": [ "# Plot the distribution of the Nucleus_Size column\n", "fig, ax = plt.subplots(figsize=(10, 6))\n", "ax.hist(df['Nucleus_Size'], bins=30, alpha=0.7, color='skyblue')\n", "\n", "# Add horizontal bars for the 0.05 and 0.95 quantiles\n", "quantile_05 = df['Nucleus_Size'].quantile(0.05)\n", "quantile_95 = df['Nucleus_Size'].quantile(0.95)\n", "ax.axvline(x=quantile_05, color='r', linestyle='--', label='Quantile 0.05')\n", "ax.axvline(x=quantile_95, color='g', linestyle='--', label='Quantile 0.95')\n", "\n", "# Add titles and labels\n", "ax.set_title('Distribution of Nucleus_Size column values with horizontal bars at 0.05 and 0.95 quantiles')\n", "ax.set_xlabel('Nucleus_Size Values')\n", "ax.set_ylabel('Frequency')\n", "ax.legend()\n", "\n", "# Display quantiles values\n", "ax.text(quantile_05, ax.get_ylim()[1], f' 5th Quantile: {quantile_05:.2f}', color='r', verticalalignment='top')\n", "ax.text(quantile_95, ax.get_ylim()[1], f' 95th Quantile: {quantile_95:.2f}', color='g', verticalalignment='top')\n", "\n", "# Display the plot\n", "plt.show()\n", "\n", "# Save the plot in the output_images_dir directory using fig.savefig\n", "plot_file_path = os.path.join(output_images_dir, \"Nucleus_Size_Distribution.png\")\n", "fig.savefig(plot_file_path)\n", "print(f\"The plot is saved at: {plot_file_path}\")" ] }, { "cell_type": "code", "execution_count": 33, "id": "fe562d2b-5026-4a88-8117-4ec910dd1242", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.05 1019.689148\n", "0.50 1346.929932\n", "0.95 2622.449524\n", "Name: AF555_Cell_Intensity_Average, dtype: float64\n" ] } ], "source": [ "print(df[\"AF555_Cell_Intensity_Average\"].quantile(q=qs))" ] }, { "cell_type": "code", "execution_count": 34, "id": "8b53dced-802d-4bfd-8e37-de33ffec9c7a", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "The plot is saved at: /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda/images/AF555_Cell_Intensity_Average_Distribution.png\n" ] } ], "source": [ "# Plot the distribution of the Nucleus_Size column\n", "fig, ax = plt.subplots(figsize=(10, 6))\n", "ax.hist(df['AF555_Cell_Intensity_Average'], bins=30, alpha=0.7, color='skyblue')\n", "\n", "# Add horizontal bars for the 0.05 and 0.95 quantiles\n", "quantile_05 = df['AF555_Cell_Intensity_Average'].quantile(0.05)\n", "quantile_95 = df['AF555_Cell_Intensity_Average'].quantile(0.95)\n", "ax.axvline(x=quantile_05, color='r', linestyle='--', label='Quantile 0.05')\n", "ax.axvline(x=quantile_95, color='g', linestyle='--', label='Quantile 0.95')\n", "\n", "# Add titles and labels\n", "ax.set_title('Distribution of AF555_Cell_Intensity_Average column values with horizontal bars at 0.05 and 0.95 quantiles')\n", "ax.set_xlabel('AF555_Cell_Intensity_Average Values')\n", "ax.set_ylabel('Frequency')\n", "ax.legend()\n", "\n", "# Display quantiles values\n", "ax.text(quantile_05, ax.get_ylim()[1], f' 5th Quantile: {quantile_05:.2f}', color='r', verticalalignment='top')\n", "ax.text(quantile_95, ax.get_ylim()[1], f' 95th Quantile: {quantile_95:.2f}', color='g', verticalalignment='top')\n", "\n", "# Display the plot\n", "plt.show()\n", "\n", "# Save the plot in the output_images_dir directory using fig.savefig\n", "plot_file_path = os.path.join(output_images_dir, \"AF555_Cell_Intensity_Average_Distribution.png\")\n", "fig.savefig(plot_file_path)\n", "print(f\"The plot is saved at: {plot_file_path}\")" ] }, { "cell_type": "code", "execution_count": 35, "id": "116f1ba5-5a9b-4161-8fab-d5ecb8524179", "metadata": { "tags": [] }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "The plot is saved at: /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda/images/CKs_Cytoplasm_Intensity_Average_Distribution.png\n" ] } ], "source": [ "# Plot the distribution of the Nucleus_Size column\n", "fig, ax = plt.subplots(figsize=(10, 6))\n", "ax.hist(df['CKs_Cytoplasm_Intensity_Average'], bins=100, alpha=0.6, color='skyblue') # Augmented number of bins\n", "\n", "# Calculate mean and median\n", "mean_value = df['CKs_Cytoplasm_Intensity_Average'].mean()\n", "median_value = df['CKs_Cytoplasm_Intensity_Average'].median()\n", "\n", "# Add vertical lines for mean and median\n", "ax.axvline(x=mean_value, color='orange', linestyle='-', label='Mean')\n", "ax.axvline(x=median_value, color='purple', linestyle='-', label='Median')\n", "\n", "# Add horizontal bars for the 0.05 and 0.95 quantiles\n", "quantile_05 = df['CKs_Cytoplasm_Intensity_Average'].quantile(0.05)\n", "quantile_95 = df['CKs_Cytoplasm_Intensity_Average'].quantile(0.95)\n", "ax.axvline(x=quantile_05, color='r', linestyle='--', label='Quantile 0.05')\n", "ax.axvline(x=quantile_95, color='g', linestyle='--', label='Quantile 0.95')\n", "\n", "# Add titles and labels\n", "ax.set_title('Distribution of CKs_Cytoplasm_Intensity_Average with Quantiles, Mean, and Median')\n", "ax.set_xlabel('CKs_Cytoplasm_Intensity_Average Values')\n", "ax.set_ylabel('Frequency')\n", "ax.legend()\n", "\n", "# Display quantile values\n", "ax.text(quantile_05, ax.get_ylim()[1], f' 5th Quantile: {quantile_05:.2f}', color='r', verticalalignment='top')\n", "ax.text(quantile_95, ax.get_ylim()[1], f' 95th Quantile: {quantile_95:.2f}', color='g', verticalalignment='top')\n", "\n", "# Display mean and median values\n", "ax.text(mean_value, ax.get_ylim()[1]*0.9, f' Mean: {mean_value:.2f}', color='orange', verticalalignment='top')\n", "ax.text(median_value, ax.get_ylim()[1]*0.85, f' Median: {median_value:.2f}', color='purple', verticalalignment='top')\n", "\n", "# Display the plot\n", "plt.show()\n", "\n", "# Save the plot\n", "plot_file_path = os.path.join(output_images_dir, \"CKs_Cytoplasm_Intensity_Average_Distribution.png\")\n", "fig.savefig(plot_file_path)\n", "print(f\"The plot is saved at: {plot_file_path}\")" ] }, { "cell_type": "code", "execution_count": 36, "id": "90c21233-43da-4692-bbf0-c90e037c035c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.05 1603.137970\n", "0.50 3559.484741\n", "0.95 10239.075195\n", "Name: CKs_Cytoplasm_Intensity_Average, dtype: float64" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"CKs_Cytoplasm_Intensity_Average\"].quantile(q=qs)" ] }, { "cell_type": "markdown", "id": "4a3e399c-684e-4c40-9963-1e96bb3fcffe", "metadata": {}, "source": [ "## I.5. COLUMNS OF INTERESTS" ] }, { "cell_type": "code", "execution_count": 37, "id": "fdcf65d1-0d91-4688-9903-25a003d755b6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Columns are now...\n", "['ROI_index', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Cell_Size', 'Sample_ID', 'Nucleus_Size', 'Nuc_X', 'AF488_Cell_Intensity_Average', 'AF488_Cytoplasm_Intensity_Average', 'AF488_Nucleus_Intensity_Average', 'AF555_Cell_Intensity_Average', 'AF555_Cytoplasm_Intensity_Average', 'AF555_Nucleus_Intensity_Average', 'AF647_Cell_Intensity_Average', 'AF647_Cytoplasm_Intensity_Average', 'AF647_Nucleus_Intensity_Average', 'AF750_Cell_Intensity_Average', 'AF750_Cytoplasm_Intensity_Average', 'AF750_Nucleus_Intensity_Average', 'aSMA_Cell_Intensity_Average', 'aSMA_Cytoplasm_Intensity_Average', 'aSMA_Nucleus_Intensity_Average', 'AXL_Cell_Intensity_Average', 'AXL_Cytoplasm_Intensity_Average', 'AXL_Nucleus_Intensity_Average', 'B7H4_Cell_Intensity_Average', 'B7H4_Cytoplasm_Intensity_Average', 'B7H4_Nucleus_Intensity_Average', 'CA9_Cell_Intensity_Average', 'CA9_Cytoplasm_Intensity_Average', 'CA9_Nucleus_Intensity_Average', 'CD4_Cell_Intensity_Average', 'CD4_Cytoplasm_Intensity_Average', 'CD4_Nucleus_Intensity_Average', 'CD8_Cell_Intensity_Average', 'CD8_Cytoplasm_Intensity_Average', 'CD8_Nucleus_Intensity_Average', 'CD11b_Cell_Intensity_Average', 'CD11b_Cytoplasm_Intensity_Average', 'CD11b_Nucleus_Intensity_Average', 'CD11c_Cell_Intensity_Average', 'CD11c_Cytoplasm_Intensity_Average', 'CD11c_Nucleus_Intensity_Average', 'CD20_Cell_Intensity_Average', 'CD20_Cytoplasm_Intensity_Average', 'CD20_Nucleus_Intensity_Average', 'CD31_Cell_Intensity_Average', 'CD31_Cytoplasm_Intensity_Average', 'CD31_Nucleus_Intensity_Average', 'CD44_Cell_Intensity_Average', 'CD44_Cytoplasm_Intensity_Average', 'CD44_Nucleus_Intensity_Average', 'CD45_Cell_Intensity_Average', 'CD45_Cytoplasm_Intensity_Average', 'CD45_Nucleus_Intensity_Average', 'CD68_Cell_Intensity_Average', 'CD68_Cytoplasm_Intensity_Average', 'CD68_Nucleus_Intensity_Average', 'CD163_Cell_Intensity_Average', 'CD163_Cytoplasm_Intensity_Average', 'CD163_Nucleus_Intensity_Average', 'CKs_Cell_Intensity_Average', 'CKs_Cytoplasm_Intensity_Average', 'CKs_Nucleus_Intensity_Average', 'ColVI_Cell_Intensity_Average', 'ColVI_Cytoplasm_Intensity_Average', 'ColVI_Nucleus_Intensity_Average', 'Desmin_Cell_Intensity_Average', 'Desmin_Cytoplasm_Intensity_Average', 'Desmin_Nucleus_Intensity_Average', 'Ecad_Cell_Intensity_Average', 'Ecad_Cytoplasm_Intensity_Average', 'Ecad_Nucleus_Intensity_Average', 'Fibronectin_Cell_Intensity_Average', 'Fibronectin_Cytoplasm_Intensity_Average', 'Fibronectin_Nucleus_Intensity_Average', 'FOXP3_Cell_Intensity_Average', 'FOXP3_Cytoplasm_Intensity_Average', 'FOXP3_Nucleus_Intensity_Average', 'GATA3_Cell_Intensity_Average', 'GATA3_Cytoplasm_Intensity_Average', 'GATA3_Nucleus_Intensity_Average', 'HLA_Cell_Intensity_Average', 'HLA_Cytoplasm_Intensity_Average', 'HLA_Nucleus_Intensity_Average', 'Ki67_Cell_Intensity_Average', 'Ki67_Cytoplasm_Intensity_Average', 'Ki67_Nucleus_Intensity_Average', 'MMP9_Cell_Intensity_Average', 'MMP9_Cytoplasm_Intensity_Average', 'MMP9_Nucleus_Intensity_Average', 'PD1_Cell_Intensity_Average', 'PD1_Cytoplasm_Intensity_Average', 'PD1_Nucleus_Intensity_Average', 'PDGFR_Cell_Intensity_Average', 'PDGFR_Cytoplasm_Intensity_Average', 'PDGFR_Nucleus_Intensity_Average', 'PDL1_Cell_Intensity_Average', 'PDL1_Cytoplasm_Intensity_Average', 'PDL1_Nucleus_Intensity_Average', 'r5c2_Cell_Intensity_Average', 'r5c2_Cytoplasm_Intensity_Average', 'r5c2_Nucleus_Intensity_Average', 'r7c2_Cell_Intensity_Average', 'r7c2_Cytoplasm_Intensity_Average', 'r7c2_Nucleus_Intensity_Average', 'r8c2_Cell_Intensity_Average', 'r8c2_Cytoplasm_Intensity_Average', 'r8c2_Nucleus_Intensity_Average', 'Sting_Cell_Intensity_Average', 'Sting_Cytoplasm_Intensity_Average', 'Sting_Nucleus_Intensity_Average', 'Vimentin_Cell_Intensity_Average', 'Vimentin_Cytoplasm_Intensity_Average', 'Vimentin_Nucleus_Intensity_Average']\n" ] } ], "source": [ "# Remove columns containing \"DAPI\"\n", "df = df[[x for x in df.columns.values if 'DAPI' not in x]]\n", "\n", "print(\"Columns are now...\")\n", "print([c for c in df.columns.values])" ] }, { "cell_type": "code", "execution_count": 38, "id": "17741329-f501-4eec-af23-ed5026b13a94", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'AF488_Cell': 'AF488_Cell_Intensity_Average',\n", " 'AF488_Cytoplasm': 'AF488_Cytoplasm_Intensity_Average',\n", " 'AF488_Nucleus': 'AF488_Nucleus_Intensity_Average',\n", " 'AF555_Cell': 'AF555_Cell_Intensity_Average',\n", " 'AF555_Cytoplasm': 'AF555_Cytoplasm_Intensity_Average',\n", " 'AF555_Nucleus': 'AF555_Nucleus_Intensity_Average',\n", " 'AF647_Cell': 'AF647_Cell_Intensity_Average',\n", " 'AF647_Cytoplasm': 'AF647_Cytoplasm_Intensity_Average',\n", " 'AF647_Nucleus': 'AF647_Nucleus_Intensity_Average',\n", " 'AF750_Cell': 'AF750_Cell_Intensity_Average',\n", " 'AF750_Cytoplasm': 'AF750_Cytoplasm_Intensity_Average',\n", " 'AF750_Nucleus': 'AF750_Nucleus_Intensity_Average',\n", " 'aSMA_Cell': 'aSMA_Cell_Intensity_Average',\n", " 'aSMA_Cytoplasm': 'aSMA_Cytoplasm_Intensity_Average',\n", " 'aSMA_Nucleus': 'aSMA_Nucleus_Intensity_Average',\n", " 'AXL_Cell': 'AXL_Cell_Intensity_Average',\n", " 'AXL_Cytoplasm': 'AXL_Cytoplasm_Intensity_Average',\n", " 'AXL_Nucleus': 'AXL_Nucleus_Intensity_Average',\n", " 'B7H4_Cell': 'B7H4_Cell_Intensity_Average',\n", " 'B7H4_Cytoplasm': 'B7H4_Cytoplasm_Intensity_Average',\n", " 'B7H4_Nucleus': 'B7H4_Nucleus_Intensity_Average',\n", " 'CA9_Cell': 'CA9_Cell_Intensity_Average',\n", " 'CA9_Cytoplasm': 'CA9_Cytoplasm_Intensity_Average',\n", " 'CA9_Nucleus': 'CA9_Nucleus_Intensity_Average',\n", " 'CD4_Cell': 'CD4_Cell_Intensity_Average',\n", " 'CD4_Cytoplasm': 'CD4_Cytoplasm_Intensity_Average',\n", " 'CD4_Nucleus': 'CD4_Nucleus_Intensity_Average',\n", " 'CD8_Cell': 'CD8_Cell_Intensity_Average',\n", " 'CD8_Cytoplasm': 'CD8_Cytoplasm_Intensity_Average',\n", " 'CD8_Nucleus': 'CD8_Nucleus_Intensity_Average',\n", " 'CD11b_Cell': 'CD11b_Cell_Intensity_Average',\n", " 'CD11b_Cytoplasm': 'CD11b_Cytoplasm_Intensity_Average',\n", " 'CD11b_Nucleus': 'CD11b_Nucleus_Intensity_Average',\n", " 'CD11c_Cell': 'CD11c_Cell_Intensity_Average',\n", " 'CD11c_Cytoplasm': 'CD11c_Cytoplasm_Intensity_Average',\n", " 'CD11c_Nucleus': 'CD11c_Nucleus_Intensity_Average',\n", " 'CD20_Cell': 'CD20_Cell_Intensity_Average',\n", " 'CD20_Cytoplasm': 'CD20_Cytoplasm_Intensity_Average',\n", " 'CD20_Nucleus': 'CD20_Nucleus_Intensity_Average',\n", " 'CD31_Cell': 'CD31_Cell_Intensity_Average',\n", " 'CD31_Cytoplasm': 'CD31_Cytoplasm_Intensity_Average',\n", " 'CD31_Nucleus': 'CD31_Nucleus_Intensity_Average',\n", " 'CD44_Cell': 'CD44_Cell_Intensity_Average',\n", " 'CD44_Cytoplasm': 'CD44_Cytoplasm_Intensity_Average',\n", " 'CD44_Nucleus': 'CD44_Nucleus_Intensity_Average',\n", " 'CD45_Cell': 'CD45_Cell_Intensity_Average',\n", " 'CD45_Cytoplasm': 'CD45_Cytoplasm_Intensity_Average',\n", " 'CD45_Nucleus': 'CD45_Nucleus_Intensity_Average',\n", " 'CD68_Cell': 'CD68_Cell_Intensity_Average',\n", " 'CD68_Cytoplasm': 'CD68_Cytoplasm_Intensity_Average',\n", " 'CD68_Nucleus': 'CD68_Nucleus_Intensity_Average',\n", " 'CD163_Cell': 'CD163_Cell_Intensity_Average',\n", " 'CD163_Cytoplasm': 'CD163_Cytoplasm_Intensity_Average',\n", " 'CD163_Nucleus': 'CD163_Nucleus_Intensity_Average',\n", " 'CKs_Cell': 'CKs_Cell_Intensity_Average',\n", " 'CKs_Cytoplasm': 'CKs_Cytoplasm_Intensity_Average',\n", " 'CKs_Nucleus': 'CKs_Nucleus_Intensity_Average',\n", " 'ColVI_Cell': 'ColVI_Cell_Intensity_Average',\n", " 'ColVI_Cytoplasm': 'ColVI_Cytoplasm_Intensity_Average',\n", " 'ColVI_Nucleus': 'ColVI_Nucleus_Intensity_Average',\n", " 'Desmin_Cell': 'Desmin_Cell_Intensity_Average',\n", " 'Desmin_Cytoplasm': 'Desmin_Cytoplasm_Intensity_Average',\n", " 'Desmin_Nucleus': 'Desmin_Nucleus_Intensity_Average',\n", " 'Ecad_Cell': 'Ecad_Cell_Intensity_Average',\n", " 'Ecad_Cytoplasm': 'Ecad_Cytoplasm_Intensity_Average',\n", " 'Ecad_Nucleus': 'Ecad_Nucleus_Intensity_Average',\n", " 'Fibronectin_Cell': 'Fibronectin_Cell_Intensity_Average',\n", " 'Fibronectin_Cytoplasm': 'Fibronectin_Cytoplasm_Intensity_Average',\n", " 'Fibronectin_Nucleus': 'Fibronectin_Nucleus_Intensity_Average',\n", " 'FOXP3_Cell': 'FOXP3_Cell_Intensity_Average',\n", " 'FOXP3_Cytoplasm': 'FOXP3_Cytoplasm_Intensity_Average',\n", " 'FOXP3_Nucleus': 'FOXP3_Nucleus_Intensity_Average',\n", " 'GATA3_Cell': 'GATA3_Cell_Intensity_Average',\n", " 'GATA3_Cytoplasm': 'GATA3_Cytoplasm_Intensity_Average',\n", " 'GATA3_Nucleus': 'GATA3_Nucleus_Intensity_Average',\n", " 'HLA_Cell': 'HLA_Cell_Intensity_Average',\n", " 'HLA_Cytoplasm': 'HLA_Cytoplasm_Intensity_Average',\n", " 'HLA_Nucleus': 'HLA_Nucleus_Intensity_Average',\n", " 'Ki67_Cell': 'Ki67_Cell_Intensity_Average',\n", " 'Ki67_Cytoplasm': 'Ki67_Cytoplasm_Intensity_Average',\n", " 'Ki67_Nucleus': 'Ki67_Nucleus_Intensity_Average',\n", " 'MMP9_Cell': 'MMP9_Cell_Intensity_Average',\n", " 'MMP9_Cytoplasm': 'MMP9_Cytoplasm_Intensity_Average',\n", " 'MMP9_Nucleus': 'MMP9_Nucleus_Intensity_Average',\n", " 'PD1_Cell': 'PD1_Cell_Intensity_Average',\n", " 'PD1_Cytoplasm': 'PD1_Cytoplasm_Intensity_Average',\n", " 'PD1_Nucleus': 'PD1_Nucleus_Intensity_Average',\n", " 'PDGFR_Cell': 'PDGFR_Cell_Intensity_Average',\n", " 'PDGFR_Cytoplasm': 'PDGFR_Cytoplasm_Intensity_Average',\n", " 'PDGFR_Nucleus': 'PDGFR_Nucleus_Intensity_Average',\n", " 'PDL1_Cell': 'PDL1_Cell_Intensity_Average',\n", " 'PDL1_Cytoplasm': 'PDL1_Cytoplasm_Intensity_Average',\n", " 'PDL1_Nucleus': 'PDL1_Nucleus_Intensity_Average',\n", " 'r5c2_Cell': 'r5c2_Cell_Intensity_Average',\n", " 'r5c2_Cytoplasm': 'r5c2_Cytoplasm_Intensity_Average',\n", " 'r5c2_Nucleus': 'r5c2_Nucleus_Intensity_Average',\n", " 'r7c2_Cell': 'r7c2_Cell_Intensity_Average',\n", " 'r7c2_Cytoplasm': 'r7c2_Cytoplasm_Intensity_Average',\n", " 'r7c2_Nucleus': 'r7c2_Nucleus_Intensity_Average',\n", " 'r8c2_Cell': 'r8c2_Cell_Intensity_Average',\n", " 'r8c2_Cytoplasm': 'r8c2_Cytoplasm_Intensity_Average',\n", " 'r8c2_Nucleus': 'r8c2_Nucleus_Intensity_Average',\n", " 'Sting_Cell': 'Sting_Cell_Intensity_Average',\n", " 'Sting_Cytoplasm': 'Sting_Cytoplasm_Intensity_Average',\n", " 'Sting_Nucleus': 'Sting_Nucleus_Intensity_Average',\n", " 'Vimentin_Cell': 'Vimentin_Cell_Intensity_Average',\n", " 'Vimentin_Cytoplasm': 'Vimentin_Cytoplasm_Intensity_Average',\n", " 'Vimentin_Nucleus': 'Vimentin_Nucleus_Intensity_Average'}" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create lists of full names and shortened names to use in plotting\n", "full_to_short_names, short_to_full_names = \\\n", " shorten_feature_names(df.columns.values[~df.columns.isin(not_intensities)])\n", "\n", "short_to_full_names" ] }, { "cell_type": "code", "execution_count": 39, "id": "dfdd64a5-8705-4ed6-b94f-c42379faefd7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The full_to_short_column_names.csv file was created !\n" ] } ], "source": [ "# Save this data to a metadata file\n", "filename = os.path.join(metadata_dir, \"full_to_short_column_names.csv\")\n", "fh = open(filename, \"w\")\n", "fh.write(\"full_name,short_name\\n\")\n", "for k,v in full_to_short_names.items():\n", " fh.write(k + \",\" + v + \"\\n\")\n", " \n", "fh.close()\n", "print(\"The full_to_short_column_names.csv file was created !\")" ] }, { "cell_type": "code", "execution_count": 40, "id": "b4e26e25-620a-4700-b091-928c6ba08a00", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The short_to_full_column_names.csv file was created !\n" ] } ], "source": [ "# Save this data to a metadata file\n", "filename = os.path.join(metadata_dir, \"short_to_full_column_names.csv\")\n", "fh = open(filename, \"w\")\n", "fh.write(\"short_name,full_name\\n\")\n", "for k,v in short_to_full_names.items():\n", " fh.write(k + \",\" + v + \"\\n\")\n", " \n", "fh.close()\n", "print(\"The short_to_full_column_names.csv file was created !\")" ] }, { "cell_type": "markdown", "id": "4c5da27d-39a6-411b-a71f-0cd82ce2b728", "metadata": {}, "source": [ "## I.6. EXPOSURE TIME" ] }, { "cell_type": "code", "execution_count": 41, "id": "2e0c99c9-b254-41c7-b264-58441252c976", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Round Target Exp Channel\n", "0 R0 AF488 300 c2\n", "1 R0 AF555 1500 c3\n", "2 R0 AF647 1500 c4\n", "3 R0 AF750 1500 c5\n", "4 R1 ColVI 300 c2\n", "df's shape: (36, 4)\n", "\n", "No null values detected.\n" ] } ], "source": [ "# Here, we want to end up with a data structure that incorporates metadata on each intensity marker column used in our big dataframe in an easy-to-use format. \n", "# This is going to include the full name of the intensity marker columns in the big data frame, \n", "# the corresponding round and channel, \n", "# the target protein (e.g., CD45), \n", "# and the segmentation localization information (cell, cytoplasm, nucleus)\n", "\n", "# We can use this data structure to assign unique colors to all channels and rounds, for example, for use in later visualizations\n", "# Exposure_time file from ASHLAR analysis\n", "filename = \"Exposure_Time.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "exp_df = pd.read_csv(filename)\n", "\n", "print(exp_df.head())\n", "\n", "# Verify file imported correctly\n", "# File length\n", "print(\"df's shape: \", exp_df.shape)\n", "# Headers\n", "expected_headers =['Round','Target','Exp','Channel']\n", "compare_headers(expected_headers, exp_df.columns.values, \"Imported metadata file\")\n", "\n", "# Missingness\n", "if exp_df.isnull().any().any():\n", " print(\"\\nexp_df has null value(s) in row(s):\")\n", " print(exp_df[exp_df.isna().any(axis=1)])\n", "else:\n", " print(\"\\nNo null values detected.\")" ] }, { "cell_type": "code", "execution_count": 42, "id": "17501193-977f-4b30-bbb8-5afdae4a0356", "metadata": {}, "outputs": [], "source": [ "if len(exp_df['Target']) > len(exp_df['Target'].unique()):\n", " print(\"One or more non-unique Target values in exp_df. Currently not supported.\")\n", "exp_df = exp_df.drop_duplicates(subset = 'Target').reindex()" ] }, { "cell_type": "code", "execution_count": 43, "id": "90d1feb7-2459-4a02-a526-ddc64082345b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChannel
0R0AF488300c2
1R0AF5551500c3
2R0AF6471500c4
3R0AF7501500c5
17R4AXL1500c3
\n", "
" ], "text/plain": [ " Round Target Exp Channel\n", "0 R0 AF488 300 c2\n", "1 R0 AF555 1500 c3\n", "2 R0 AF647 1500 c4\n", "3 R0 AF750 1500 c5\n", "17 R4 AXL 1500 c3" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sort exp_df by the values in the 'Target' column in ascending order and then retrieve the first few rows of the sorted df\n", "exp_df.sort_values(by = ['Target']).head()" ] }, { "cell_type": "code", "execution_count": 44, "id": "b7373851-14bb-4813-be00-7f8c4e1f4ebf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChanneltarget_lower
0R0AF488300c2af488
1R0AF5551500c3af555
2R0AF6471500c4af647
3R0AF7501500c5af750
4R1ColVI300c2colvi
\n", "
" ], "text/plain": [ " Round Target Exp Channel target_lower\n", "0 R0 AF488 300 c2 af488\n", "1 R0 AF555 1500 c3 af555\n", "2 R0 AF647 1500 c4 af647\n", "3 R0 AF750 1500 c5 af750\n", "4 R1 ColVI 300 c2 colvi" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create lowercase version of target\n", "exp_df['target_lower'] = exp_df['Target'].str.lower()\n", "exp_df.head()" ] }, { "cell_type": "code", "execution_count": 45, "id": "4722fdb1-cd93-45a0-b196-89d50918b2e2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_column
0AF488_Cell_Intensity_Average
1AF488_Cytoplasm_Intensity_Average
2AF488_Nucleus_Intensity_Average
3AF555_Cell_Intensity_Average
4AF555_Cytoplasm_Intensity_Average
\n", "
" ], "text/plain": [ " full_column\n", "0 AF488_Cell_Intensity_Average\n", "1 AF488_Cytoplasm_Intensity_Average\n", "2 AF488_Nucleus_Intensity_Average\n", "3 AF555_Cell_Intensity_Average\n", "4 AF555_Cytoplasm_Intensity_Average" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create df that contains marker intensity columns in our df that aren't in not_intensities\n", "intensities = pd.DataFrame({'full_column':df.columns.values[~df.columns.isin(not_intensities)]})\n", "\n", "intensities.head()" ] }, { "cell_type": "code", "execution_count": 46, "id": "f88ef6d6-8160-4fa4-a0ed-942e4127414e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_columnmarkermarker_lower
0AF488_Cell_Intensity_AverageAF488af488
1AF488_Cytoplasm_Intensity_AverageAF488af488
2AF488_Nucleus_Intensity_AverageAF488af488
3AF555_Cell_Intensity_AverageAF555af555
4AF555_Cytoplasm_Intensity_AverageAF555af555
\n", "
" ], "text/plain": [ " full_column marker marker_lower\n", "0 AF488_Cell_Intensity_Average AF488 af488\n", "1 AF488_Cytoplasm_Intensity_Average AF488 af488\n", "2 AF488_Nucleus_Intensity_Average AF488 af488\n", "3 AF555_Cell_Intensity_Average AF555 af555\n", "4 AF555_Cytoplasm_Intensity_Average AF555 af555" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Extract the marker information from the `full_column`, which corresponds to full column in big dataframe\n", "# Use regular expressions (regex) to isolate the part of the field that begins (^) with an alphanumeric value (W), and ends with an underscore (_)\n", "# '$' is end of line\n", "intensities['marker'] = intensities['full_column'].str.extract(r'([^\\W_]+)')\n", "# convert to lowercase\n", "intensities['marker_lower'] = intensities['marker'].str.lower()\n", "\n", "intensities.head()" ] }, { "cell_type": "code", "execution_count": 47, "id": "2b9ff02b-a33b-4f42-95c0-4b4132e2aa25", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_columnmarkermarker_lower
0AF488_Cell_Intensity_AverageAF488af488
1AF488_Cytoplasm_Intensity_AverageAF488af488
2AF488_Nucleus_Intensity_AverageAF488af488
3AF555_Cell_Intensity_AverageAF555af555
4AF555_Cytoplasm_Intensity_AverageAF555af555
\n", "
" ], "text/plain": [ " full_column marker marker_lower\n", "0 AF488_Cell_Intensity_Average AF488 af488\n", "1 AF488_Cytoplasm_Intensity_Average AF488 af488\n", "2 AF488_Nucleus_Intensity_Average AF488 af488\n", "3 AF555_Cell_Intensity_Average AF555 af555\n", "4 AF555_Cytoplasm_Intensity_Average AF555 af555" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Subset the intensities df to exclude any column pertaining to DAPI\n", "intensities = intensities.loc[intensities['marker_lower'] != 'dapi']\n", "\n", "intensities.head()" ] }, { "cell_type": "code", "execution_count": 48, "id": "fa1340aa-1c86-4b0f-82a2-ded1892dea6e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChanneltarget_lowerfull_columnmarker
0R0AF488300c2af488AF488_Cell_Intensity_AverageAF488
1R0AF488300c2af488AF488_Cytoplasm_Intensity_AverageAF488
2R0AF488300c2af488AF488_Nucleus_Intensity_AverageAF488
3R0AF5551500c3af555AF555_Cell_Intensity_AverageAF555
4R0AF5551500c3af555AF555_Cytoplasm_Intensity_AverageAF555
........................
103R8Sting1000c4stingSting_Cytoplasm_Intensity_AverageSting
104R8Sting1000c4stingSting_Nucleus_Intensity_AverageSting
105R8CD11b1500c5cd11bCD11b_Cell_Intensity_AverageCD11b
106R8CD11b1500c5cd11bCD11b_Cytoplasm_Intensity_AverageCD11b
107R8CD11b1500c5cd11bCD11b_Nucleus_Intensity_AverageCD11b
\n", "

108 rows Ɨ 7 columns

\n", "
" ], "text/plain": [ " Round Target Exp Channel target_lower \\\n", "0 R0 AF488 300 c2 af488 \n", "1 R0 AF488 300 c2 af488 \n", "2 R0 AF488 300 c2 af488 \n", "3 R0 AF555 1500 c3 af555 \n", "4 R0 AF555 1500 c3 af555 \n", ".. ... ... ... ... ... \n", "103 R8 Sting 1000 c4 sting \n", "104 R8 Sting 1000 c4 sting \n", "105 R8 CD11b 1500 c5 cd11b \n", "106 R8 CD11b 1500 c5 cd11b \n", "107 R8 CD11b 1500 c5 cd11b \n", "\n", " full_column marker \n", "0 AF488_Cell_Intensity_Average AF488 \n", "1 AF488_Cytoplasm_Intensity_Average AF488 \n", "2 AF488_Nucleus_Intensity_Average AF488 \n", "3 AF555_Cell_Intensity_Average AF555 \n", "4 AF555_Cytoplasm_Intensity_Average AF555 \n", ".. ... ... \n", "103 Sting_Cytoplasm_Intensity_Average Sting \n", "104 Sting_Nucleus_Intensity_Average Sting \n", "105 CD11b_Cell_Intensity_Average CD11b \n", "106 CD11b_Cytoplasm_Intensity_Average CD11b \n", "107 CD11b_Nucleus_Intensity_Average CD11b \n", "\n", "[108 rows x 7 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Merge the intensities andexp_df together to create metadata\n", "metadata = pd.merge(exp_df, intensities, how = 'left', left_on = 'target_lower',right_on = 'marker_lower')\n", "metadata = metadata.drop(columns = ['marker_lower'])\n", "metadata = metadata.dropna()\n", "\n", "# Target is the capitalization from the Exposure_Time.csv\n", "# target_lower is Target in small caps\n", "# marker is the extracted first component of the full column in segmentation data, with corresponding capitalization\n", "metadata" ] }, { "cell_type": "code", "execution_count": 49, "id": "75378ca2-3ecb-4c60-a7d4-da997f851066", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChanneltarget_lowerfull_columnmarkerlocalisation
0R0AF488300c2af488AF488_Cell_Intensity_AverageAF488cell
1R0AF488300c2af488AF488_Cytoplasm_Intensity_AverageAF488cytoplasm
2R0AF488300c2af488AF488_Nucleus_Intensity_AverageAF488nucleus
3R0AF5551500c3af555AF555_Cell_Intensity_AverageAF555cell
4R0AF5551500c3af555AF555_Cytoplasm_Intensity_AverageAF555cytoplasm
...........................
103R8Sting1000c4stingSting_Cytoplasm_Intensity_AverageStingcytoplasm
104R8Sting1000c4stingSting_Nucleus_Intensity_AverageStingnucleus
105R8CD11b1500c5cd11bCD11b_Cell_Intensity_AverageCD11bcell
106R8CD11b1500c5cd11bCD11b_Cytoplasm_Intensity_AverageCD11bcytoplasm
107R8CD11b1500c5cd11bCD11b_Nucleus_Intensity_AverageCD11bnucleus
\n", "

108 rows Ɨ 8 columns

\n", "
" ], "text/plain": [ " Round Target Exp Channel target_lower \\\n", "0 R0 AF488 300 c2 af488 \n", "1 R0 AF488 300 c2 af488 \n", "2 R0 AF488 300 c2 af488 \n", "3 R0 AF555 1500 c3 af555 \n", "4 R0 AF555 1500 c3 af555 \n", ".. ... ... ... ... ... \n", "103 R8 Sting 1000 c4 sting \n", "104 R8 Sting 1000 c4 sting \n", "105 R8 CD11b 1500 c5 cd11b \n", "106 R8 CD11b 1500 c5 cd11b \n", "107 R8 CD11b 1500 c5 cd11b \n", "\n", " full_column marker localisation \n", "0 AF488_Cell_Intensity_Average AF488 cell \n", "1 AF488_Cytoplasm_Intensity_Average AF488 cytoplasm \n", "2 AF488_Nucleus_Intensity_Average AF488 nucleus \n", "3 AF555_Cell_Intensity_Average AF555 cell \n", "4 AF555_Cytoplasm_Intensity_Average AF555 cytoplasm \n", ".. ... ... ... \n", "103 Sting_Cytoplasm_Intensity_Average Sting cytoplasm \n", "104 Sting_Nucleus_Intensity_Average Sting nucleus \n", "105 CD11b_Cell_Intensity_Average CD11b cell \n", "106 CD11b_Cytoplasm_Intensity_Average CD11b cytoplasm \n", "107 CD11b_Nucleus_Intensity_Average CD11b nucleus \n", "\n", "[108 rows x 8 columns]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add a column to signify marker target localisation.\n", "# Use a lambda to determine segmented location of intensity marker column and update metadata accordingly\n", "# Using the add_metadata_location() function in my_modules.py\n", "metadata['localisation'] = metadata.apply(\n", " lambda row: add_metadata_location(row), axis = 1)\n", "\n", "metadata" ] }, { "cell_type": "code", "execution_count": 50, "id": "9779567d-bd63-468a-acdb-e30901c71e03", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The marker_intensity_metadata.csv file was created !\n" ] } ], "source": [ "# Save this data structure to the metadata folder\n", "# don't want to add color in because that's better off treating color the same for round, channel, and sample\n", "filename = \"marker_intensity_metadata.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "\n", "metadata.to_csv(filename, index = False)\n", "print(\"The marker_intensity_metadata.csv file was created !\")" ] }, { "cell_type": "markdown", "id": "d371ebc6-4aee-4b43-914e-933e3f9abbd5", "metadata": {}, "source": [ "## I.7. COLORS WORKFLOW" ] }, { "cell_type": "markdown", "id": "0f4e010e-149b-4fe9-8a58-7f9f872d2be9", "metadata": {}, "source": [ "### I.7.1. CHANNELS COLORS" ] }, { "cell_type": "code", "execution_count": 51, "id": "fb818967-a0b7-4bd0-9b81-32fd1bfefde1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Unique channels are: ['c2' 'c3' 'c4' 'c5']\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUoAAABlCAYAAAArpKpSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAACs0lEQVR4nO3ZQWoTYRzG4X9CawI6CXQZEjc9hSdw4QW8gndQ2nt5Affuu+hADtC0UKR0XIgKgnmnqWEa+zzrb/HyBX7MZEZd13UFwF+Nhx4A8NQJJUAglACBUAIEQgkQCCVAIJQAwVGfQ/f397Ver6tpmhqNRvveBLB3XdfVZrOpxWJR4/H2Z8ZeoVyv17Varf7JOICnpG3bWi6XW8/0CmXTNFVVdf71vKbN9PHLnokPnz4PPeEwvf849IKD8+718dATDs7d9U19efP2V9+26RXKn6/b02Za05lQ9jV70et6+dPLV0MvODhHjVDuqs/fiT7mAARCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRHfQ51XVdVVbeb272O+d9cfbsbesJhurkeesHBudscDz3h4Nxd31TV775tM+p6nLq4uKjT09PHLwN4Ytq2reVyufVMryfKk5OTqqq6vLys+Xz++GXPxNXVVa1Wq2rbtmaz2dBzDoI72417e7iu62qz2dRisYhne4VyPP7xV+Z8Pvcj7GA2m7m3B3Jnu3FvD9P3wc/HHIBAKAGCXqGcTCZ1dnZWk8lk33v+K+7t4dzZbtzbfvX66g3wnHn1BgiEEiAQSoBAKAECoQQIhBIgEEqAQCgBgu+VwWR6hdDVcgAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUoAAABlCAYAAAArpKpSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAACpklEQVR4nO3Zv2rbUBjG4WNj0FLLkNHYU4bupXTp2KV3lYvrPXTMEIHnYgVKIfh0CKFd6ld2ahQlz7PYwxk+Ppsf+jOrtdYCwD/Nxx4A4KUTSoBAKAECoQQIhBIgEEqAQCgBgsWQQ4fDoex2u7JcLstsNrv0TAAXV2stfd+X9Xpd5vPj14yDQrnb7cp2u/0vwwG8JF3Xlc1mc/TMoFAul8vHL5+6Mlu0zx7srfjxeTX2CJP0/uvHsUeYnG9fvo89wuT0pZYP5eefvh0xKJRPt9uzRSuUJ2ibsSeYpvm7QX9L/rIsHomda8jjRC9zAAKhBAiEEiAQSoBAKAECoQQIhBIgEEqAQCgBAqEECIQSIBBKgEAoAQKhBAiEEiAQSoBAKAECoQQIhBIgEEqAQCgBAqEECIQSIBBKgEAoAQKhBAiEEiAQSoBAKAECoQQIhBIgEEqAQCgBAqEECIQSIBBKgEAoAQKhBAiEEiAQSoBAKAECoQQIhBIgEEqAQCgBAqEECIQSIBBKgEAoAQKhBAiEEiAQSoBAKAECoQQIhBIgEEqAQCgBAqEECIQSIBBKgEAoAQKhBAiEEiAQSoBAKAECoQQIhBIgEEqAQCgBAqEECIQSIBBKgEAoAYLFkEO11sfPh/1Fh3lt9r/GnmCaDvcPY48wOX2pY48wOU87e+rbMbM64NTt7W25vr5+/mQAL0zXdWWz2Rw9M+iK8urqqpRSyt3dXVmtVs+f7I3Y7/dlu92WrutK27ZjjzMJdnYeeztdrbX0fV/W63U8OyiU8/njo8zVauVHOEPbtvZ2Ijs7j72dZuiFn5c5AIFQAgSDQtk0Tbm5uSlN01x6nlfF3k5nZ+ext8sa9NYb4C1z6w0QCCVAIJQAgVACBEIJEAglQCCUAIFQAgS/AVPOYnIQS7DnAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# we want colors that are categorical, since Channel is a non-ordered category (yes, they are numbered, but arbitrarily). \n", "# A categorical color palette will have dissimilar colors.\n", "# Get those unique colors\n", "if len(metadata.Channel.unique()) > 10:\n", " print(\"WARNING: There are more unique channel values than \\\n", " there are colors to choose from. Select different palette, e.g., \\\n", " continuous palette 'husl'.\")\n", "channel_color_values = sb.color_palette(\"bright\",n_colors = len(metadata.Channel.unique()))\n", "# chose 'colorblind' because it is categorical and we're unlikely to have > 10\n", "\n", "# You can customize the colors for each channel here\n", "custom_colors = {\n", " 'c2': 'lightgreen',\n", " 'c3': 'tomato',\n", " 'c4': 'pink',\n", " 'c5': 'turquoise'\n", "}\n", "\n", "custom_colors_values = sb.palplot(sb.color_palette([custom_colors.get(ch, 'blue') for ch in metadata.Channel.unique()]))\n", "\n", "# Display those unique customs colors\n", "print(\"Unique channels are:\", metadata.Channel.unique())\n", "sb.palplot(sb.color_palette(channel_color_values))" ] }, { "cell_type": "code", "execution_count": 52, "id": "e7aa9d2e-94b5-461b-859f-fadfa2ccc0c5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'c2': array([0.00784314, 0.24313725, 1. ]),\n", " 'c3': array([1. , 0.48627451, 0. ]),\n", " 'c4': array([0.10196078, 0.78823529, 0.21960784]),\n", " 'c5': array([0.90980392, 0. , 0.04313725])}" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Store in a dictionary\n", "channel_color_dict = dict(zip(metadata.Channel.unique(), channel_color_values))\n", "channel_color_dict\n", "for k,v in channel_color_dict.items():\n", " channel_color_dict[k] = np.float64(v)\n", "\n", "channel_color_dict" ] }, { "cell_type": "code", "execution_count": 53, "id": "aa2d6cf5-2126-4df4-a54f-c6926c2c06a1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rgbhexChannel
c2(0.00784313725490196, 0.24313725490196078, 1.0)#023effc2
c3(1.0, 0.48627450980392156, 0.0)#ff7c00c3
c4(0.10196078431372549, 0.788235294117647, 0.219...#1ac938c4
c5(0.9098039215686274, 0.0, 0.043137254901960784)#e8000bc5
\n", "
" ], "text/plain": [ " rgb hex Channel\n", "c2 (0.00784313725490196, 0.24313725490196078, 1.0) #023eff c2\n", "c3 (1.0, 0.48627450980392156, 0.0) #ff7c00 c3\n", "c4 (0.10196078431372549, 0.788235294117647, 0.219... #1ac938 c4\n", "c5 (0.9098039215686274, 0.0, 0.043137254901960784) #e8000b c5" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "color_df_channel = color_dict_to_df(channel_color_dict, \"Channel\")\n", "\n", "# Save to file in metadatadirectory\n", "filename = \"channel_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "color_df_channel.to_csv(filename, index = False)\n", "\n", "color_df_channel" ] }, { "cell_type": "code", "execution_count": 54, "id": "d8117d2d-c60e-477c-bef4-dc89d18fa6aa", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGEAAACHCAYAAAAGGzc8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAK6UlEQVR4nO2df0yU9R/AXxci3MHlnUdw51cPUZCZxRwxEWT7eqsWtSVlGH/owmksgpWjlkQGrkwXCmtqRaUt+8OWbDrrjww2dWpbJ2ZgP0ALAisJz2JUwol293z/+MbNizg4eO78yPN5bW733D3P5/3x8+LzfJ77vJ/PczpFURQkN5RbbnQFJFKCEEgJAiAlCICUIABSggBICQIgJQiAlCAAUoIASAkCICUIgJQgAFKCAEwZ74Eej4dr166pWZebjsjISCIiIiZcTtASFEWhp6eHvr6+CQefDJhMJqxWKzqdbtxlBC1hSEB8fDwGg2FCwW9mFEVhYGAAl8sFgM1mG3dZQUnweDw+ARaLZdxBJwt6vR4Al8tFfHz8uE9NQQ3MQ2OAwWAYV7DJyFBbTGR8HNfVkVZPQf+GGm0hL1EFQEoQgBsiQafTcfDgwRsROmhWr17NQw89FNIYIZHQ09PDU089xZw5c4iKimLWrFk8+OCDHD58OBThbnrG/Y15JLq6uliyZAkmk4mtW7eSlpbGtWvXaGhooLS0lLNnz6od8qZH9Z5QUlKCTqejqamJ/Px85s2bx4IFC3jmmWdwOp2+/X799VcefvhhDAYDKSkpfPzxx77PPB4Pa9euJSkpCb1eT2pqKtu3b/eLM3SaqKmpwWazYbFYKC0t9btUnD17Nlu2bGHNmjUYjUbsdjvvvPOOXzkXLlygoKAAs9mMxWIhLy+Prq4utZslIKpK6O3t5dNPP6W0tJSYmJhhn5tMJt/rl156iUcffZSvvvqKBx54gJUrV9Lb2wuA1+tl5syZ1NfX09raSlVVFS+88AL19fV+5R09epSOjg6OHj3K+++/z549e9izZ4/fPrW1tWRkZNDc3ExJSQlPPvmkrzcODAzgcDiIjY3l+PHjfPbZZ8TGxpKbm8vVq1fVbJrAKEHgdruV1tZWxe12/+vnJ0+eVADlwIEDAcsBlBdffNG3ffnyZUWn0ymHDh0a8ZiSkhLlkUce8W0XFhYqiYmJyl9//eV7b8WKFUpBQYFvOzExUVm1apVv2+v1KvHx8UpdXZ2iKIry7rvvKqmpqYrX6/XtMzg4qOj1eqWhocEXJy8vb8R6jdYmY0HVMUH5+97isXyBSUtL872OiYnBaDT65mEA3nrrLXbv3s358+dxu91cvXqVhQsX+pWxYMECv6kCm83G119/PWIcnU6H1Wr1xTl9+jTt7e0YjUa/Y65cuUJHR8eo/we1UFVCSkoKOp2Otra2US/rIiMj/bZ1Oh1erxeA+vp6ysrKqK2tJSsrC6PRyLZt2zh58uSYyxjLPl6vl7vuuou9e/cOq99tt90WsP5qoqqE6dOnc9999/HGG2/w9NNPDxsX+vr6/MaFkThx4gTZ2dmUlJT43gvFX2Z6ejr79u0jPj6eW2+9VfXyx4rqV0dvvvkmHo+HRYsWsX//fr7//nva2trYsWMHWVlZYyojOTmZL774goaGBr777jsqKys5deqU2lVl5cqVxMXFkZeXx4kTJ+js7OTYsWOsW7eOn3/+WfV4I6G6hKSkJL788kscDgfPPvssd9xxB/feey+HDx+mrq5uTGUUFxezfPlyCgoKyMzM5LfffvPrFWphMBg4fvw4drud5cuXM3/+fNasWYPb7Q5rz9ApythX6ly5coXOzk6SkpKIjo4OZb1uGtRoEzmBJwBSggBICQIgJQiAlCAAUoIASAkCICUIgJQgAKpO4EX8V83SRsdzTJ1yurq62LRpE0eOHKGnp4cZM2awatUqNmzYwNSpU9UJEgDVc8w3I2fPnsXr9fL222+TnJzMN998Q1FREf39/dTU1IQ8vqYkeL1etm3bxq5du/jpp59ISEjgiSeeYMOGDeTm5vr2mzNnDufOnaOurk5KUJuKigp27drFa6+9Rk5ODr/88suId3/8/vvvTJ8+PSz10oyEP//8k+3bt/P6669TWFgIwNy5c8nJyRm2b0dHBzt37qS2tjYsddPM1VFbWxuDg4PcfffdAffr7u4mNzeXFStW8Pjjj4elbpqRMLSWIBDd3d04HA6ysrKG3Z8USjQjISUlBb1eP+KtmBcuXGDp0qWkp6fz3nvvccst4WsazYwJ0dHRlJeXs379eqZOncqSJUu4dOkS3377Lffffz9Lly7FbrdTU1PDpUuXfMdZrdaQ100zEgAqKyuZMmUKVVVVdHd3Y7PZKC4uprGxkfb2dtrb25k5c6bfMUFkf8eNzDFPEJljniRICQIgJQiAlCAAUoIASAkCICUIgJQgAFKCAEgJAqDu3FFlmB88skm9eZ1ly5bR0tKCy+XCbDZzzz33UF1dzYwZM1SLMRKyJ/yNw+Ggvr6ec+fOsX//fjo6OsjPzw9LbE1J8Hq9VFdXk5ycTFRUFHa7nc2bNwNQVlbG4sWLSUxMJDs7m+effx6n0xmW5/xpaip7rIn+3t5e9u7dS3Z29rDVn6FAMz1hKNG/detWCgsLfUn+6/PI5eXlxMTEYLFY+PHHH/noo4/CUjfNSBhLov+5556jubmZxsZGIiIieOyxx8KS1NHM6Wgsif64uDji4uKYN28e8+fPZ9asWTidzjEv/R0vmukJoyX6/8lQDxgcHAxltQAN9YRAif4777yTpqYmcnJyMJvN/PDDD1RVVTF37tyQ9wLQkAQYOdGv1+s5cOAAGzdupL+/H5vNRm5uLh9++CFRUVEhr5dM9E8QmeifJEgJAiAlCICUIABSggBICQIgJQiAlCAAUoIASAkCoOrc0X+aQj/ZdT0XFn2uepmDg4NkZmZy5swZmpubhz0QNxTInvAP1q9fH5Y7LK5HUxICJfoBDh06RGNjY1hW8V+PpqayAyX6L168SFFREQcPHgz7r2dpRkKgFf2KorB69WqKi4vJyMi4uX8/QWQCJfp37tzJH3/8QUVFxQ2omYYkBEr0HzlyBKfTSVRUFFOmTCE5ORmAjIwMX68JJZqRECjRv2PHDs6cOUNLSwstLS188sknAOzbt89v4A4VmhkTAiX6165d67dvbGws8P8x45+Ly0OBZiTAyIn+G41M9E8QmeifJEgJAiAlCICUIABSggBICQIgJQiAlCAAUoIASAkCoOrc0UVdrJrFjUqCclm1smbPns358+f93isvL+fVV19VLcZIaGoCbzRefvllioqKfNtDs6mhRlOno9ES/UajEavV6vsnJYSAiooKqqurqayspLW1lQ8++ICEhATf59XV1VgsFhYuXMjmzZvD9hPxmjkdjfbo/nXr1pGeno7ZbKapqYmKigo6OzvZvXt3yOumGQmjregvKyvzvU5LS8NsNpOfn+/rHaFEM6ejsazov57FixcD0N7eHorq+KEZCcGu6G9ubgbAZrOFslqAhk5HgRL9t99+O06nE4fDwbRp0zh16hRlZWUsW7YMu90e+sopQeB2u5XW1lbF7XYHc5gweDwe5ZVXXlESExOVyMhIxW63K1u2bFFOnz6tZGZmKtOmTVOio6OV1NRUZePGjUp/f/+oZarRJjLRP0Fkon+SICUIgJQgAFKCAIxLQhBj+aRHjbYISsLQ4ykHBgYmHHiyMNQWE3l0Z1Bf1iIiIjCZTLhcLgAMBgM6XZgfzSwIiqIwMDCAy+XCZDIREREx7rKC+p4wFLynp4e+vr5xB51MmEwmrFbrhP4Yg5YwhMfjCcsjjEUmMjJyQj1giHFLkKiHvEQVAClBAKQEAZASBEBKEAApQQCkBAH4HyylbgTqTcAfAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Legend of channel info only\n", "g = plt.figure(figsize = (1,1)).add_subplot(111)\n", "g.axis('off')\n", "handles = []\n", "for item in channel_color_dict.keys():\n", " h = g.bar(0,0, color = channel_color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Channel'),\n", " # bbox_to_anchor=(10,10), \n", " # bbox_transform=plt.gcf().transFigure)\n", "\n", "filename = \"Channel_legend.png\"\n", "filename = os.path.join(metadata_images_dir, filename)\n", "plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "markdown", "id": "b5d587ec-809c-4312-acda-3747fc63fd83", "metadata": {}, "source": [ "### I.7.2. ROUNDS COLORS" ] }, { "cell_type": "code", "execution_count": 55, "id": "40bedb40-5740-4956-aa97-a95ac9fb341a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['R0' 'R1' 'R2' 'R3' 'R4' 'R5' 'R6' 'R7' 'R8']\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAs0AAABlCAYAAAC2n94rAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAD40lEQVR4nO3cwU5jBRiG4b+lUBDbzmAmUUJNTLwDvQB3XoAXxK25NvEipAk7NpQKQeG4MGqchPnOOQ6eOcPzbGnIn4+SvpDCpGmapgAAgCdNhz4AAAA+dKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQzNo86PHxsS4vL2uxWNRkMnnumwAA4H/RNE1tt9s6PT2t6fTp3ye3iubLy8tar9fv7TgAAPiQbDabOjs7e/LjraJ5sVhUVdV3629qNt17P5e9AJ+9/nroE0bp+M1XQ58wOvt+qO1l7+z10CeM0uHZ0dAnjM6rL1q93PKWz9/YrasvX30y9Amjs7vZ1Q/ffv937z6l1bPxr7dkzKZ7tT/1BG7rYO9g6BNG6WD/cOgTRmf/QMT0MTv04tLH/MhuXR0ee+3s4+hTu3V1vDge+oTRSm9B9oeAAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQiGYAAAhEMwAABKIZAAAC0QwAAIFoBgCAQDQDAEAgmgEAIBDNAAAQzNo8qGmaqqr6/fHhWY/52Nw/3A99wijt/3Y39Amj09zfDn3CKD3c/Tr0CaM0uW2GPmF07natXm55y+2N3brazXx/drW72VXVP737lFbPxqurq6qq+nHz838864X55aehLwAAoIXtdlur1erJj7eK5pOTk6qquri4eOcn49+ur69rvV7XZrOp5XI59DmjYLN+7NadzfqxW3c268du3dmsn6Zparvd1unp6Tsf1yqap9M/3/q8Wq18EXpYLpd268hm/ditO5v1Y7fubNaP3bqzWXdtfinsDwEBACAQzQAAELSK5vl8Xufn5zWfz5/7no+K3bqzWT92685m/ditO5v1Y7fubPa8Jk36/xoAAPDCeXsGAAAEohkAAALRDAAAgWgGAIBANAMAQCCaAQAgEM0AABCIZgAACP4AgRCVeH0RRYAAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# we want colors that are sequential, since Round is an ordered category. \n", "# We can still generate colors that are easy to distinguish. Also, many of the categorical palettes cap at at about 10 or so unique colors, and repeat from there. \n", "# We do not want any repeats!\n", "round_color_values = sb.cubehelix_palette(\n", " len(metadata.Round.unique()), start=1, rot= -0.75, dark=0.19, light=.85, reverse=True)\n", "# round_color_values = sb.color_palette(\"cubehelix\",n_colors = len(metadata.Round.unique()))\n", "# chose 'cubehelix' because it is sequential, and round is a continuous process\n", "# each color value is a tuple of three values: (R, G, B)\n", "print(metadata.Round.unique())\n", "\n", "sb.palplot(sb.color_palette(round_color_values))\n", "\n", "## TO-DO: write what these parameters mean" ] }, { "cell_type": "code", "execution_count": 56, "id": "4372b4f0-2242-4d76-8881-a78fba5fd814", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'R0': array([0.28685356, 0.13009829, 0.23110332]),\n", " 'R1': array([0.36541462, 0.2025447 , 0.3769331 ]),\n", " 'R2': array([0.40867533, 0.29407612, 0.51667119]),\n", " 'R3': array([0.42890614, 0.40822902, 0.63353489]),\n", " 'R4': array([0.44444629, 0.5264665 , 0.70563219]),\n", " 'R5': array([0.47707206, 0.64270618, 0.74184779]),\n", " 'R6': array([0.54144549, 0.74667592, 0.75729058]),\n", " 'R7': array([0.64147101, 0.83215511, 0.7746773 ]),\n", " 'R8': array([0.76842569, 0.89926671, 0.81713833])}" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Store in a dictionary\n", "round_color_dict = dict(zip(metadata.Round.unique(), round_color_values))\n", "\n", "for k,v in round_color_dict.items():\n", " round_color_dict[k] = np.float64(v)\n", "\n", "round_color_dict" ] }, { "cell_type": "code", "execution_count": 57, "id": "3a6053ce-d87c-4137-8c70-a6772208fc37", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rgbhexRound
R0(0.28685356234627135, 0.13009829239513535, 0.2...#49213bR0
R1(0.36541462435986094, 0.2025447048359916, 0.37...#5d3460R1
R2(0.40867533458903105, 0.2940761173840091, 0.51...#684b84R2
R3(0.42890613750051265, 0.4082290173220481, 0.63...#6d68a2R3
R4(0.4444462906865238, 0.5264664993764805, 0.705...#7186b4R4
R5(0.47707206309601013, 0.6427061780374552, 0.74...#7aa4bdR5
R6(0.5414454866716836, 0.7466759172596551, 0.757...#8abec1R6
R7(0.6414710091647722, 0.8321551072276492, 0.774...#a4d4c6R7
R8(0.7684256891219349, 0.8992667116749021, 0.817...#c4e5d0R8
\n", "
" ], "text/plain": [ " rgb hex Round\n", "R0 (0.28685356234627135, 0.13009829239513535, 0.2... #49213b R0\n", "R1 (0.36541462435986094, 0.2025447048359916, 0.37... #5d3460 R1\n", "R2 (0.40867533458903105, 0.2940761173840091, 0.51... #684b84 R2\n", "R3 (0.42890613750051265, 0.4082290173220481, 0.63... #6d68a2 R3\n", "R4 (0.4444462906865238, 0.5264664993764805, 0.705... #7186b4 R4\n", "R5 (0.47707206309601013, 0.6427061780374552, 0.74... #7aa4bd R5\n", "R6 (0.5414454866716836, 0.7466759172596551, 0.757... #8abec1 R6\n", "R7 (0.6414710091647722, 0.8321551072276492, 0.774... #a4d4c6 R7\n", "R8 (0.7684256891219349, 0.8992667116749021, 0.817... #c4e5d0 R8" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "color_df_round = color_dict_to_df(round_color_dict, \"Round\")\n", "\n", "# Save to file in metadatadirectory\n", "filename = \"round_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "color_df_round.to_csv(filename, index = False)\n", "\n", "color_df_round" ] }, { "cell_type": "code", "execution_count": 58, "id": "977a7e45-64df-4bd6-ab21-cecd44d76fd9", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGEAAADwCAYAAAAQPApFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAQjElEQVR4nO2dfUxT1//H37WAUsjoAvI0cDIgCIqAD1sGi9OvS0HchLlBs+gm6JzDTKcozvkQfJ7Ll4fN+LBsAk6HDwtLF9xvU0E3lR8MHVCmc4qADJNCmgwUtMh4ON8/SLtSlbZwbzlyzishsb099356X55z2/vuuVdCCCHgDCujhrsADpdABVwCBXAJFMAlUACXQAFcAgVwCRTAJVAAl0ABXAIFcAkUwCVQAJdAAXaDbdjT04Ouri4ha3nisLe3h1QqHfJ6rJZACEFzczPu3Lkz5I2PBORyOTw9PSGRSAa9Dqsl6AW4u7tDJpMNaeNPMoQQ6HQ6aLVaAICXl9eg12WVhJ6eHoMAV1fXQW90pODo6AgA0Gq1cHd3H/TQZNWBWX8MkMlkg9rYSES/L4ZyfBzUpyNWh6BHIcS+4B9RKYBLoAAuAUBSUhLi4+OHbfs2k5CUlASJRAKJRAI7OzuMGzcOKSkpaG1ttVUJ1GLTnhATE4OmpiY0NDTg4MGDOHnyJJYvX27LEqjEphJGjx4NT09P+Pj4QKFQQKlU4syZMwCA3t5ebNu2DT4+Phg9ejTCw8Nx6tQpQ9tffvkFEomk3zd1tVoNiUSChoYGAMChQ4cgl8tx+vRpBAcHw9nZ2SBeT09PD1JTUyGXy+Hq6op169ZhuH+EOGzHhPr6epw6dQr29vYAgM8//xyZmZnIyMjA77//jujoaMybNw83b960ar06nQ4ZGRk4cuQILly4gMbGRqxdu9awPDMzE7m5ucjJyUFJSQlaWlqgUqkEfW9WQ6ygo6ODXLt2jXR0dFjTjBBCyKJFi4hUKiVOTk5kzJgxBAABQLKysgghhHh7e5OdO3f2azN9+nSyfPlyQgghP//8MwFAWltbDcurqqoIAHLr1i1CCCF5eXkEAKmtrTW8Zt++fcTDw8Pw2MvLi+zevdvwuKuri/j4+JC4uDir3xMhQ9snegZ9FnUwzJo1CwcOHIBOp8PBgwdRU1ODFStWoK2tDRqNBlFRUf1eHxUVherqaqu2IZPJ4O/vb3js5eVlOL9z9+5dNDU14cUXXzQst7Ozw7Rp04Z1SLLpcOTk5ISAgABMnjwZe/bsQWdnJ7Zu3WpYbvrtkxBieG7UqFGG5/Q86lSBfngzXudw7mBLGNbvCenp6cjIyMC9e/fg7e2NkpKSfstLS0sRHBwMABg7diwA9DvIqtVqq7bn4uICLy8v/Prrr4bnuru7UVFRMch3IAw2HY5MmTlzJiZOnIhdu3YhLS0N6enp8Pf3R3h4OPLy8qBWq5Gfnw8ACAgIgK+vL7Zs2YIdO3bg5s2byMzMtHqbH374IXbv3o3AwEAEBwcjKytr2LORYZUAAKmpqUhOTkZNTQ3a2tqwZs0aaLVahISEoLCwEIGBgQD6hpljx44hJSUFYWFhmD59Onbs2IGEhASrtrdmzRo0NTUhKSkJo0aNwuLFi/H666/j7t27Yrw9i5AQKwbMBw8e4NatW/Dz88OYMWPErOuJQYh9ws8dUQCXQAFcAgVwCRTAJVAAl0ABXAIFcAkUwCVQgCCnLWKfe0mI1VjMj/Ul5l9kQlJSEr7++msAgFQqhbe3N+bOnYtdu3bh6aefBgB0dnZi7dq1OHbsGDo6OjB79mzs378fPj4+gtZvClM9wVzGvWrVKqhUKhw/fhwlJSW4d+8eXn31VfT09Iha17CfwLMl+owbAHx8fKBUKnHo0CEAfYFPTk4Ojhw5gldeeQUA8M0338DX1xfFxcWIjo4WrS6meoIxphl3RUUFurq6oFAoDK/x9vbGpEmTUFpaKmotTPWEH374Ac7Ozujp6cGDBw8AAFlZWQD6fvLv4OBgOD7o8fDwQHNzs6h1MSXhcRn3QBhHrGLB1HA0UMbt6emJf/7556FfBGq1Wnh4eIhaF1MSTNFn3BqNBlOnToW9vT2KiooMy5uamnD16lVERkaKWgfTEowzbhcXFyxZsgRr1qzB2bNnUVVVhYULFyI0NNTwaUksmJYA9GXcX331FW7fvo3s7GzEx8cjMTERUVFRkMlkOHnypCAzNAeCZ8xDhGfMIwQugQK4BArgEiiAS6AALoECuAQK4BIogEugAC6BAgTJE96ZnizEaizm8OU8q9tYEvR/+eWXOHr0KCorK9He3o7W1lbI5XIhS38kTPUEc0G/TqdDTEwMNmzYYNO6mErWBgr6gb5fWwB9E9dtCVM9wRjToH84YaonDBT0DydMSRhM0G8LmBqOzE1mHy6YkmCKcdA/nDAtwTjoB/p+AKZWq1FbWwsAuHLlCtRqNVpaWkStg2kJQP+g/4svvkBERASWLl0KAJgxYwYiIiJQWFgoag086B8iPOgfIXAJFMAlUACXQAFcAgVwCRTAJVAAl0ABXAIFcAkUIEiekPLqTiFWYzEHfthodRtzQX9LSwvS09Nx5swZ3L59G25uboiPj8f27dvh4uIi9FvoB1OhTkxMDPLy8tDd3Y1r165h8eLFuHPnDo4dOwaNRgONRoOMjAyEhITgr7/+wvvvvw+NRoOCggJR62JKwkBB/6RJk/Ddd98ZXuvv74+dO3di4cKF6O7uhp2deLuK2WOCJUH/3bt38dRTT4kqAGCsJ1gT9P/999/Yvn07li1bJnpdTEmwNOhva2vD3LlzERISgvT0dNHrYmo4siTob29vR0xMDJydnaFSqWzyuySmJJhiGvS3tbVBoVDAwcEBhYWFNksPmZZgHPS3t7dDoVDg/v37yMnJQVtbG5qbm9Hc3MwvOiU2+qvWv/DCCygvLwfQd5sAY27duoXx48eLVgMP+ocID/pHCFwCBXAJFMAlUACXQAFcAgVwCRTAJVAAl0ABXAIFCHLuaGXKESFWYzF7DrxtdRtLZvQvW7YMxcXF0Gg0cHZ2RmRkJD799FNMmDBB0PpNYaonmJvRP3XqVOTl5eHPP//E6dOnQQiBQqHgZ1GFxNyM/vfee8/w7/Hjx2PHjh0ICwtDQ0NDv3s8Cw1TPcEYc0H//fv3kZeXBz8/P/j6+opaC1M9wZKgf//+/Vi3bh3u37+PCRMmoKioCA4ODqLWxVRPmDVrFtRqNcrLy7FixQpER0c/FPQvWLAAVVVVOH/+PAIDA5GYmGgQJhZMSbAk6HdxcUFgYCBmzJiBgoICXL9+HSqVStS6mJJgiiUz+gkh6OzsFLUOpiUYB/319fX45JNPUFFRgcbGRpSVlSExMRGOjo6IjY0VtQ6mJQD/zuiXSqW4ePEiYmNjERAQgMTERDg5OaG0tBTu7u6i1sCD/iHCg/4RApdAAVwCBXAJFMAlUACXQAFcAgVwCRTAJVAAl0ABgoQ6qZ/8nxCrsZisj+da3caSoF8PIQSxsbE4deoUVCoV4uPjhSj7sTDVE8wF/Xo+++wz0e/BbAxT8aa5oB8AqqurkZWVhcuXL8PLy8smdTHVE4x5VNCv0+nw1ltvYe/evQZZtoCpnmAu6F+9ejUiIyMRFxdn07qYkjDQjP7CwkKcO3cOVVVVNq+LqeFooKD/3LlzqKurg1wuh52dneGiIm+88QZmzpwpal1M9QRT0tPTMWfOHKSkpGD9+vV49913+y0PDQ1FdnY2XnvtNVHrYFqCcdD/uIPxuHHj4OfnJ2odTA1Hj8L40v3DBQ/6hwgP+kcIXAIFcAkUwCVQAJdAAVwCBXAJFMAlUACXQAFcAgUIcgJvY/45IVZjMTsX/MfqNpYE/TNnzsT58+f7tVMqlTh+/PjQix4Aps6iDnTpfj1Lly7Ftm3bDI8dHR1Fr4spCZYE/TKZzKb5MsDwMeFxM/rz8/Ph5uaGiRMnYu3atWhvbxe9FqZ6grmgf8GCBfDz84OnpyeuXr2Kjz/+GNXV1SgqKhK1LqYkmLt0v/4+zEDfnUUCAwMxbdo0VFZWYsqUKaLVxdRwZMmMfmOmTJkCe3t73Lx5U9S6mJJgirkZ/X/88Qe6urpE/yUe0xKMg/66ujps27YNv/32GxoaGvDjjz8iISEBERERiIqKErUOpiUA/wb9Dg4OOHv2LKKjoxEUFISVK1dCoVCguLgYUqlU1Bp40D9EeNA/QuASKIBLoAAugQK4BArgEiiAS6AALoECuAQK4BIoQJA84b8/XxBiNRaTNmuG1W0sndFfVlaGjRs3ory8HPb29ggPD8dPP/0katbMVE8wN6O/rKwMMTExUCgUuHTpEi5fvowPPvgAo0aJu5uYStbMBf2rV6/GypUrsX79esNzgYGBotfFVE8wxjTo12q1KC8vh7u7OyIjI+Hh4YGXX34ZJSUlotfCVE8YKOivr68HAGzZsgUZGRkIDw/H4cOHMXv2bFy9elXUHsFUTxjo0v29vb0A+u6rk5ycjIiICGRnZyMoKAi5ubmi1sWUhIGCfn2OHBIS0q9NcHAwGhsbRa2LKQmmGAf948ePh7e3N27cuNHvNTU1NXj22WdFrYNpCcZBv0QiQVpaGvbs2YOCggLU1tZi8+bNuH79OpYsWSJqHUwdmB9FamoqkpOT8dFHH2HVqlV48OABVq9ejZaWFoSFhaGoqEjUO0sBPOgfMjzoHyFwCRTAJVAAl0ABXAIFcAkUwCVQAJdAAVwCBXAJFCDIuaP8P8qEWI3FLJj4otVtzAX9DQ0Nj7305rfffouEhIQh1TwQTPWEgYJ+X19fNDU19fvbunUrnJycMGfOHFHrYuos6kBBv1QqfWgmv0qlglKphLOzs6h1MdUTjHncjH49FRUVUKvVomcJAGM9wdyMfmNycnIQHByMyMhI0etiSoK5Gf16Ojo6cPToUWzevNkmdTE1HFk6o7+goAA6nQ7vvPOOTepiSoIpj5vRn5OTg3nz5mHs2LE2qYNpCcZBv57a2lpcuHDhoXspiAnTEoCHL92fm5uLZ555BgqFwmY18KB/iPCgf4TAJVAAl0ABXAIFcAkUwCVQAJdAAVwCBXAJFMAlUIAgecL/a6qFWI3FRHmHWd3Gkhn9zc3NSEtLQ1FREdrb2xEUFIQNGzbgzTffFLR+U5jqCeZm9L/99tu4ceMGCgsLceXKFcyfPx9KpVL0ezQzJUEf9Pv4+EChUECpVOLMmTOG5WVlZVixYgWef/55PPfcc9i0aRPkcjkqKytFrYspCcY8Kuh/6aWXcOLECbS0tKC3txfHjx9HZ2cnvym2kJgL+k+cOAGlUglXV1fY2dlBJpNBpVKJPnGQKQnmgv5NmzahtbUVxcXFcHNzw/fff4+EhARcvHgRoaGhotXF1HA0UNBfV1eHvXv3Ijc3F7Nnz0ZYWBjS09Mxbdo07Nu3T9S6mJJginHQr9PpAOChaxtJpVLDdS/EgmkJxkH/hAkTEBAQgGXLluHSpUuoq6tDZmYmioqKEB8fL24hxAo6OjrItWvXSEdHhzXNqGDRokUkLi7uoefz8/OJg4MDaWxsJDU1NWT+/PnE3d2dyGQyMnnyZHL48OEB1yvEPuFB/xDhQf8IgUugAC6BArgEChiUBCuO5SMeIfaFVRL0J7v0X2w4/+6Lx834sQSrzh1JpVLI5XJotVoAfXdslUgkg974kwwhBDqdDlqtFnK5fEi3AbPqe4J+483Nzbhz586gNzqSkMvl8PT0HNJ/Rqsl6Onp6UFXV9egNzwSsLe3F+RGeIOWwBEO/hGVArgECuASKIBLoAAugQK4BArgEijgf8WZukHE4nTCAAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Legend of round info only\n", "\n", "round_legend = plt.figure(figsize = (1,1)).add_subplot(111)\n", "round_legend.axis('off')\n", "handles = []\n", "for item in round_color_dict.keys():\n", " h = round_legend.bar(0,0, color = round_color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Round'),\n", " # bbox_to_anchor=(10,10), \n", " # bbox_transform=plt.gcf().transFigure)\n", "\n", "filename = \"Round_legend.png\"\n", "filename = os.path.join(metadata_images_dir, filename)\n", "plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "markdown", "id": "34b81c62-58a4-432c-876a-dfa5df0bbe9e", "metadata": {}, "source": [ "### I.7.3. SAMPLES COLORS" ] }, { "cell_type": "code", "execution_count": 59, "id": "3ccb74da-fb1d-4fda-9116-bf6b6a6d868b", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUoAAABlCAYAAAArpKpSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAACuUlEQVR4nO3ZMWsTcRzG8V9SaVohCXQMydTB2ffhu3BxEFzduvhWuvqGhA49yAvogSmCPQdRJ/NcW8P17Ocz/4eHf+DLXW7SdV1XAPzVdOgBAE+dUAIEQgkQCCVAIJQAgVACBEIJELzoc+ju7q62223N5/OaTCaH3gRwcF3XVdu2tVqtajrd/8zYK5Tb7bY2m80/GQfwlDRNU+v1eu+ZXqGcz+dVVfXl7ceaH588ftkzcfn689ATRuny5P3QE0bn3fWboSeMzu62rQ+fXv3u2z69QvnrdXt+fFKLmVD2dXp6NPSEUTo6fTn0hNE5PVkMPWG0+vyd6GMOQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQPCiz6Gu66qqqv12e9Ax/5vd7vvQE0bpe/d16Amjs7u9GXrC6Oxu26r607d9Jl2PU1dXV3V+fv74ZQBPTNM0tV6v957p9UR5dnZWVVXX19e1XC4fv+yZuLm5qc1mU03T1GKxGHrOKLizh3Fv99d1XbVtW6vVKp7tFcrp9Odfmcvl0o/wAIvFwr3dkzt7GPd2P30f/HzMAQiEEiDoFcrZbFYXFxc1m80Ovee/4t7uz509jHs7rF5fvQGeM6/eAIFQAgRCCRAIJUAglACBUAIEQgkQCCVA8ANJyGd00cXSZwAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# we want colors that are neither sequential nor categorical. \n", "# Categorical would be ideal if we could generate an arbitrary number of colors, but I do not think that we can. \n", "# Hense, we will choose `n` colors from a continuous palette. First we will generate the right number of colors. Later, we will assign TMA samples to gray.\n", "\n", "# Get those unique colors\n", "color_values = sb.color_palette(\"husl\",n_colors = len(ls_samples))#'HLS'\n", "# each color value is a tuple of three values: (R, G, B)\n", "\n", "# Display those unique colors\n", "sb.palplot(sb.color_palette(color_values))" ] }, { "cell_type": "code", "execution_count": 60, "id": "441a57fe-d55a-49e1-8593-0f1939472b89", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGEAAABlCAYAAABdl421AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAABwUlEQVR4nO3dP2oCQRxH8e9KYGzWBUtv6Qk8gie0ccELWNk4KcIGUqiDRnyJ7wPTuTDM2z92v67WWqOXmr16AzICghEAjABgBAAjABgB4KPlR+fzOYfDIX3fp+u6Z+/p36i15ng8ZrVaZTa7cr/XBuM41iSuO9c4jlfPt+lJ6Ps+SbJer1NKablESU6nU7bb7ff5XdIUYXoFlVIyn88f392bufUK98MMYAQAIwAYAcAIAEYAMAKAEQCMAGAEACMAGAHACABGADACgBEAjABgBAAjABgBwAgARgAwAoARAIwAYAQAIwAYAcAIAEYAMAKAEQCMAGAEACMAGAHACABGADACgBEAjABgBAAjABgBwAgARgAwAoARAIwAYAQAIwAYAcAIAEYAMAKAEQCMAGAEACMAGAHACABGADACgBEAjABgBAAjABgBwAgARgAwAoARAIwA0DRxsNaa5GuModpN5zWd30UtA1B3u93Lh4j+5fUrA1CXy2WSZL/fZxiGlkuU/BgFfE1ThGmW8DAMWSwWj+/ujbTctH6YAYwA0BShlJLNZuNA7Cfp6s3/T3o2X0cARgAwAoARAIwAYAQAIwAYAeATtddd94S3hxMAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]\n", "TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = \"gray\")\n", "sb.palplot(sb.color_palette(TMA_color_values))" ] }, { "cell_type": "code", "execution_count": 61, "id": "18dbf741-983e-4652-97d1-d55e87eba4fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'DD3S1.csv': (0.9677975592919913, 0.44127456009157356, 0.5358103155058701),\n", " 'DD3S2.csv': (0.5920891529639701, 0.6418467016378244, 0.1935069134991043),\n", " 'DD3S3.csv': (0.21044753832183283, 0.6773105080456748, 0.6433941168468681),\n", " 'TMA.csv': (0.5019607843137255, 0.5019607843137255, 0.5019607843137255)}" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Store in a dictionary\n", "color_dict = dict()\n", "color_dict = dict(zip(df.Sample_ID.unique(), color_values))\n", "\n", "# Replace all TMA samples' colors with gray\n", "i = 0\n", "for key in color_dict.keys():\n", " if 'TMA' in key:\n", " color_dict[key] = TMA_color_values[i]\n", " i +=1\n", "\n", "color_dict" ] }, { "cell_type": "code", "execution_count": 62, "id": "807f2bb5-6d19-4086-81c1-98836e850dcd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rgbhexSample_ID
DD3S1.csv(0.9677975592919913, 0.44127456009157356, 0.53...#f77189DD3S1.csv
DD3S2.csv(0.5920891529639701, 0.6418467016378244, 0.193...#97a431DD3S2.csv
DD3S3.csv(0.21044753832183283, 0.6773105080456748, 0.64...#36ada4DD3S3.csv
TMA.csv(0.5019607843137255, 0.5019607843137255, 0.501...#808080TMA.csv
\n", "
" ], "text/plain": [ " rgb hex \\\n", "DD3S1.csv (0.9677975592919913, 0.44127456009157356, 0.53... #f77189 \n", "DD3S2.csv (0.5920891529639701, 0.6418467016378244, 0.193... #97a431 \n", "DD3S3.csv (0.21044753832183283, 0.6773105080456748, 0.64... #36ada4 \n", "TMA.csv (0.5019607843137255, 0.5019607843137255, 0.501... #808080 \n", "\n", " Sample_ID \n", "DD3S1.csv DD3S1.csv \n", "DD3S2.csv DD3S2.csv \n", "DD3S3.csv DD3S3.csv \n", "TMA.csv TMA.csv " ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "color_df_sample = color_dict_to_df(color_dict, \"Sample_ID\")\n", "\n", "# Save to file in metadatadirectory\n", "filename = \"sample_color_data.csv\"\n", "filename = os.path.join(metadata_dir, filename)\n", "color_df_sample.to_csv(filename, index = False)\n", "\n", "color_df_sample" ] }, { "cell_type": "code", "execution_count": 63, "id": "39b6afdb-2e37-471e-86a0-e55ab7667e39", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJgAAACHCAYAAADqQpBvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAATgklEQVR4nO2dfVAV1/nHP7xI4OaigEggRF5EATVCfENtOloFAkEMHV6iFBVf0oo2FaNTEmyLg6nSokZDWkxqECQSFVRAI2LMFaI4mDG+kUTHFMQ0CqhBTI0gr/v7I2V/XgW8CosXOJ+ZO8PuefacZ3e/nN27e77nGkiSJCEQKIThk05A0LsRAhMoihCYQFGEwASKIgQmUBQhMIGiCIEJFEUITKAoQmACRRECEyiKEJhAUYTABIoiBCZQFCEwgaIYP+oGzc3NNDY2KpGLQI8wMTHB0LDz/Y/OApMkiaqqKm7dutXpRgX6j6GhIc7OzpiYmHSqHgNdBxxWVlZy69YtbGxsUKlUGBgYdKphgf7S0tJCRUUF/fr1w8HBoVPnWqcerLm5WRbXwIEDH7sxQc9h0KBBVFRU0NTURL9+/R67Hp0usq33XCqV6rEbEvQsWi+Nzc3Nnarnke7ixGWx79BV51o8phAoihCYQFGEwBTGwMCAnJycJ53GE6NXCOz69essWrQIBwcHnnrqKWxtbfHz86O4uPhJp9bneeQn+fpISEgIjY2NbNu2jSFDhnDt2jU0Gg03b9580qkJJB2oq6uTzp8/L9XV1ekS3q3U1NRIgFRYWNhuzIYNG6Tnn39eUqlU0nPPPSctXrxYun37tlyempoqDRgwQNq/f7/k6uoqmZmZSSEhIdJPP/0kpaWlSY6OjpKFhYX0+uuvS01NTfJ2jo6O0urVq6Xw8HDp6aefluzs7KSkpCSttgEpOztbXr5y5Yr06quvShYWFpKVlZX0yiuvSOXl5V12PLqKrjrnPf4SqVarUavV5OTkUF9f32aMoaEhSUlJfP3112zbto0jR44QExOjFVNbW0tSUhI7d+4kPz+fwsJCgoODycvLIy8vj48++oh//etf7N69W2u7devW4eHhwenTp4mNjeWNN97g8OHDbeZRW1vL1KlTUavVHD16lKKiItRqNf7+/jQ0NHTNAdE3ulPNSrF7927J0tJSMjU1lX7xi19IsbGx0rlz59qNz8zMlAYOHCgvp6amSoBUWloqr1u0aJGkUqm0ejo/Pz9p0aJF8rKjo6Pk7++vVffMmTOll19+WV7mnh4sJSVFcnNzk1paWuTy+vp6yczMTDp06NCj77iCiB7sHkJCQqioqGDfvn34+flRWFjImDFjSEtLA6CgoABfX1/s7e0xNzdn7ty5VFdXc+fOHbkOlUqFi4uLvPzMM8/g5OSEWq3WWnf9+nWttidNmvTA8oULF9rM89SpU5SWlmJubi73vFZWVty9e5eysrLOHga9pFfc5AOYmpri6+uLr68vcXFxvPbaa6xatYqpU6cSEBBAVFQUb7/9NlZWVhQVFbFw4UKtYUf3v28zMDBoc11LS8tDc2nvKXhLSwtjx44lIyPjgbJBgwbpsps9jl4jsPsZMWIEOTk5fPnllzQ1NbFhwwZ5fFNmZmaXtXPixIkHlt3d3duMHTNmDLt27cLGxob+/ft3WQ76TI+/RFZXVzNt2jS2b99OSUkJ5eXlZGVlkZiYSFBQEC4uLjQ1NfHee+9x6dIlPvroI95///0ua//48eMkJiby7bff8s9//pOsrCyio6PbjI2IiMDa2pqgoCCOHTtGeXk5n3/+OdHR0Vy5cqXLctInenwPplarmTBhAhs3bqSsrIzGxkYGDx7Mb3/7W1auXImZmRnvvPMOf//734mNjWXy5MkkJCQwd+7cLml/xYoVnDp1ivj4eMzNzdmwYQN+fn5txqpUKo4ePcqbb75JcHAwt2/fxt7eHm9v717bo+k04PDu3buUl5fj7OyMqalpd+TVI3BycmLZsmUsW7bsSafS5XTVOe/xl0iBfiMEJlCUHn8P9iS5fPnyk05B7xE9mEBRhMAEiiIEJlAUITCBogiBCRRFCEygKJ1+THF3eWJX5KETpu/EPDzoPubNm8e2bdsAMDY2xsrKCg8PD8LDw5k3b578AtzJyYnvvvvu53ZMTXnmmWfw8vIiKiqKadOmyfVVV1cTERFBSUkJ1dXV2NjYEBQUxNq1a7Ve93zwwQckJydTWlpKv379cHZ2ZtasWbz55psAfPPNN8TFxXHq1Cm+++47Nm7c2CvfCPSJHszf35/KykouX77MwYMHmTp1KtHR0QQGBtLU1CTHrV69msrKSi5evEh6ejoWFhb4+PiwZs0aOcbQ0JCgoCD27dvHt99+S1paGp999hlRUVFyTEpKCsuXL2fp0qWcO3eO48ePExMTw08//STH1NbWMmTIEP72t79ha2vbPQfiCdAnHrS2Oo0A7O3tGTNmDBMnTsTb25u0tDRee+01AMzNzeU4BwcHJk+ejJ2dHXFxcYSGhuLm5oalpSWLFy+W63Z0dGTJkiWsW7dOXrd//35effVVFi5cKK8bOXKkVk7jx49n/PjxALz11ls678vx48dZuXIlJ0+e5KmnnsLLy4udO3diaWnJ7t27iY+Pp7S0FJVKxejRo8nNzaWoqIigoCCqqqqwsLCQ62r9B/j88891bv9R6RM9WFtMmzYNT09P9u7d22FcdHQ0kiSRm5vbZnlFRQV79+5lypQp8jpbW1tOnDghX3K7irNnz+Lt7c3IkSMpLi6mqKiIGTNm0NzcTGVlJeHh4SxYsIALFy7IngJJkvDx8cHCwoI9e/bIdTU3N5OZmUlERESX5ng/fVZgAO7u7g993WNlZYWNjc0DceHh4ahUKuzt7enfvz8ffvihXLZq1SosLCxwcnLCzc2NefPmkZmZqdNo2I5ITExk3LhxJCcn4+npyciRI3n99dextramsrKSpqYmgoODcXJyYtSoUSxZsgS1Wo2RkREzZ87k448/luvSaDTU1NQQFhbWqZweRp8WmCRJOk3y0Vbcxo0bOX36NDk5OZSVlbF8+XK5zM7OjuLiYr766iuWLl1KY2MjkZGR+Pv7d0pkrT1YW3h6euLt7c2oUaMICwtjy5Yt1NTUyOUREREUFhZSUVEBQEZGBgEBAVhaWj52PrrQpwV24cIFnJ2dO4yprq7mxo0bD8TZ2tri7u5OUFAQH3zwAZs3b6ayslIr5vnnn+f3v/89GRkZHD58mMOHD3fqfsfMzKzdMiMjIw4fPszBgwcZMWIE7733Hm5ubpSXlwPg5eWFi4sLO3fupK6ujuzsbGbPnv3YuehKnxXYkSNH+OqrrwgJCekw7t1338XQ0JBf//rX7ca0jtlsz5cJP3sEAC0n06Pi4eGBRqNpt9zAwIAXX3yR+Ph4zpw5g4mJCdnZ2XL5b37zGzIyMti/fz+GhoZMnz79sXPRlT7xLbK+vp6qqiqam5u5du0a+fn5JCQkEBgYqDV0+vbt21RVVdHY2Eh5eTnbt2/nww8/JCEhgaFDhwKQl5fHtWvXGD9+PGq1mvPnzxMTE8OLL76Ik5MTAIsXL+bZZ59l2rRpPPfcc1RWVvLXv/6VQYMGyTa3hoYGzp8/L/999epVzp49i1qtltv6xz/+QXZ2tiyq2NhY+d4qKioKExMTCgoKCAsLo6ysDI1Gw0svvYSNjQ1ffPEFN27cYPjw4fL+RUREEB8fz5o1awgNDe2W0cl9QmD5+fnY2dlhbGyMpaUlnp6eJCUlERkZqTWTclxcHHFxcZiYmGBra8vEiRPRaDRMnTpVjjEzM2PLli288cYb1NfXM3jwYIKDg7UeNfj4+LB161Y2b95MdXU11tbWTJo0CY1GI09BWlFRwejRo+Vt1q9fz/r165kyZQqFhYUA/PDDD1p+SVdXVz799FNWrlyJl5cXZmZmTJgwgfDwcPr378/Ro0fZtGkT//3vf3F0dGTDhg28/PLL8vbDhg1j/PjxnDx5kk2bNnX1YW4TMSZf0CZiTL6gRyAEJlAUITCBogiBCRRFCEygKEJgAkURAhMoihCYQFGEwASKIgQmUJROv4t8P2NMV+ShE1ERpx95G301fWzZsoX09HS+/vprAMaOHcvatWvx8vJ6vIOjp/SJHkwfTR+FhYWEh4dTUFBAcXExDg4OvPTSS1y9erV7Dko30SdGU+ij6eP+iYC3bNnC7t270Wg0Hc6+KEwfPQR9M33U1tbS2NiIlZVVuzHC9NHD0CfTx1tvvYW9vT0+Pj7txgjTRw9DX0wfiYmJ7Nixg71793Y49kqYPnoY+mD6WL9+PWvXruXTTz/Fw8Ojw1yE6aMHoQ+mj3Xr1vH222+Tn5/PuHHjHpqzMH3oKfpo+khMTOQvf/kLH3/8MU5OTlRVVQH//+tx0DtMH32iB2s1fTg5OeHv709BQQFJSUnk5uZiZGQkx8XFxWFnZ8fQoUOZM2cOP/74IxqNRn44Cv9v+vjlL3/J8OHDWbZsGYGBgXzyySdyjI+PDydOnCAsLAxXV1dCQkIwNTXVMn0kJyfT0NBAaGgodnZ28mf9+vVyPe2ZPs6dO4eXlxeTJk0iNzcXY2Nj2fQREBCAq6srf/7zn9s1fZSUlCj+7bEVYfoQtIkwfQh6BEJgAkURAhMoihCYQFGEwASKIgQmUBQhMIGiCIEJFEUITKAoQmACRen0y+4JezMeHtRFfBH86O/P9NX0sXfvXtauXUtpaSmNjY0MGzaMFStWMGfOnMc+PvpIn+jB9NH0YWVlxZ/+9CeKi4spKSlh/vz5zJ8/n0OHDnXPQekm+sRwHX00ffzqV7/SWo6Ojmbbtm0UFRXh5+fX7r4I00cPQZ9MH5IkodFouHjxIpMnT243rieaPvpED9Ye7u7ulJSUdBjTkekjNzeXuro6ZsyY8YDpo9V84erqyqRJkwgICCA0NFRr0uEff/wRe3t76uvrMTIyIjk5GV9f33Zzudf00Uprz3j69GnZ9OHo6AjAqFGj5LhW00drrypMH93AkzZ9mJubc/bsWU6ePMmaNWtYvny5PMN0WwjTRw/jSZs+DA0NGTp0KC+88AIrVqwgNDSUhISEdnMRpo8ehD6YPtqqp6M6hOlDT9FH00dCQgLjxo3DxcWFhoYG8vLySE9PZ/PmzXI+vcH00ScEpo+/9HHnzh2WLFnClStXMDMzw93dne3btzNz5ky5HvFLH4JeizB9CHoEQmACRRECEyiKEJhAUYTABIoiBCZQFCEwgaIIgQkURQhMoChCYAJF6fS7yPj4+K7IQydWrVqlc+zDxnlFRkaSlpYmxxUXFzNx4kS5vL6+nmeffZabN29SUFDwwBDn3/3ud6SkpJCRkcGsWbN034k+Rq/twSorK+XPpk2b6N+/v9a6d999V44dPHgwqampWttnZ2fLU1neT21tLbt27eKPf/wjKSkpiu5HT6fXCszW1lb+DBgwAAMDgwfWtRIZGSkPxGtl69atREZGtll3VlYWI0aMIDY2luPHjz90rn2Affv2MW7cOExNTbG2tiY4OFguS05OZtiwYbJdLjQ0FPjZ+mZvb//A1OevvPJKu7npG71WYI/C2LFjcXZ2lk0R33//PUePHm3Xo5iSksLs2bMZMGAAAQEBD/R+93PgwAGCg4OZPn06Z86cQaPRyLNKf/nllyxdupTVq1dz8eJF8vPzZeNHWFgYP/zwAwUFBXJdNTU1HDp0qNvmWO0sQmD/Y/78+WzduhWA1NRUAgICGDRo0ANx//73vzlx4oQ8bmv27NmkpqZ2+Csea9asYdasWcTHxzN8+HA8PT1ZuXIlAP/5z394+umnCQwMxNHRkdGjR7N06VLgZ8OJv7+/1i90ZGVlYWVl1e7YfH1DCOx/zJ49m+LiYi5dukRaWhoLFixoMy4lJQU/Pz+sra0BCAgI4M6dO3z22Wft1t2RWcPX1xdHR0eGDBnCnDlzyMjIoLa2Vi6PiIhgz5498lDq1i8V986Orc8Igf2PgQMHEhgYyMKFC7l7967WSNBWmpubSU9P58CBAxgbG2NsbIxKpeLmzZsd3ux3ZNYwNzfn9OnT7NixQzb5enp6cuvWLQBmzJhBS0sLBw4c4Pvvv+fYsWPdYtboKoTA7mHBggUUFhYyd+7cNnuIvLw8bt++zZkzZzh79qz8ycrKIicnh+rq6jbrfZhZw9jYGB8fHxITEykpKeHy5cscOXIE+FmcwcHBZGRksGPHDlxdXRk7dmzX7HA30CfG5OuKv78/N27c0JrE5F5SUlKYPn06np6eWutHjhzJsmXL2L59O9HR0cTGxnL16lXS09OBn5/feXt74+LiwqxZs2hqauLgwYPExMTwySefcOnSJSZPnoylpSV5eXm0tLTg5uYm1x8REcGMGTP45ptvelTvBYCkA3V1ddL58+eluro6XcL1jtTUVGnAgAFtlgFSdnZ2m2U1NTUSIBUUFEhVVVWSsbGxlJmZ2WbsH/7wB2nUqFGSJElSZGSkNGXKFK3yPXv2SC+88IJkYmIiWVtbS8HBwZIkSdKxY8ekKVOmSJaWlpKZmZnk4eEh7dq1S2vbpqYmyc7OTgKksrIy3Xe8E3TVORemD0GbCNOHoEcgBCZQFCEwgaIIgQkU5ZEEpsP3AUEvoavOtU4C69evH4DWKwxB76ahoQGg06+kdHrQamRkhIWFBdevXwdApVLpNHGboGfS0tLCjRs3UKlUGBt37lm8zlu3To7bKjJB78bQ0BAHB4dOdyQ6PWi9l+bmZhobGzvVqED/MTEx0Zra6nF5ZIEJBI+CeEwhUBQhMIGiCIEJFEUITKAoQmACRRECEyiKEJhAUf4PvSMxXLJcjmcAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Legend of sample info only\n", "g = plt.figure(figsize = (1,1)).add_subplot(111)\n", "g.axis('off')\n", "handles = []\n", "for item in color_dict.keys():\n", " h = g.bar(0,0, color = color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample')\n", "\n", "filename = \"Sample_legend.png\"\n", "filename = os.path.join(metadata_images_dir, filename)\n", "plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "markdown", "id": "e3cf0dea-43db-41fa-952e-bbba53e89cdb", "metadata": {}, "source": [ "### I.7.4. CLUSTERS COLORS" ] }, { "cell_type": "code", "execution_count": 64, "id": "d37c61d7-de64-4b7c-8d01-86ee16ac67c4", "metadata": {}, "outputs": [], "source": [ "if 'cluster' in df.columns:\n", " cluster_color_values = sb.color_palette(\"hls\",n_colors = len(df.cluster.unique()))\n", "\n", " print(sorted(test_df.cluster.unique()))\n", " # Display those unique colors\n", " sb.palplot(sb.color_palette(cluster_color_values))\n", " \n", " cluster_color_dict = dict(zip(sorted(test_df.cluster.unique()), cluster_color_values))\n", " print(cluster_color_dict)\n", " \n", " # Create dataframe\n", " cluster_color_df = color_dict_to_df(cluster_color_dict, \"cluster\")\n", " cluster_color_df.head()\n", "\n", " # Save to file in metadatadirectory\n", " filename = \"cluster_color_data.csv\"\n", " filename = os.path.join(metadata_dir, filename)\n", " cluster_color_df.to_csv(filename, index = False)" ] }, { "cell_type": "code", "execution_count": 65, "id": "c9215452-fdb6-4963-9f56-31f16e0483bb", "metadata": {}, "outputs": [], "source": [ "# Legend of cluster info only\n", "\n", "if 'cluster' in df.columns:\n", " g = plt.figure(figsize = (1,1)).add_subplot(111)\n", " g.axis('off')\n", " handles = []\n", " for item in sorted(cluster_color_dict.keys()):\n", " h = g.bar(0,0, color = cluster_color_dict[item],\n", " label = item, linewidth =0)\n", " handles.append(h)\n", " first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cluster'),\n", "\n", "\n", " filename = \"Clustertype_legend.png\"\n", " filename = os.path.join(metadata_images_dir, filename)\n", " plt.savefig(filename, bbox_inches = 'tight')" ] }, { "cell_type": "code", "execution_count": 66, "id": "715409aa", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RoundTargetExpChanneltarget_lowerfull_columnmarkerlocalisationround_colorchannel_color
0R0AF488300c2af488AF488_Cell_Intensity_AverageAF488cell[0.28685356234627135, 0.13009829239513535, 0.2...[0.00784313725490196, 0.24313725490196078, 1.0]
1R0AF488300c2af488AF488_Cytoplasm_Intensity_AverageAF488cytoplasm[0.28685356234627135, 0.13009829239513535, 0.2...[0.00784313725490196, 0.24313725490196078, 1.0]
2R0AF488300c2af488AF488_Nucleus_Intensity_AverageAF488nucleus[0.28685356234627135, 0.13009829239513535, 0.2...[0.00784313725490196, 0.24313725490196078, 1.0]
3R0AF5551500c3af555AF555_Cell_Intensity_AverageAF555cell[0.28685356234627135, 0.13009829239513535, 0.2...[1.0, 0.48627450980392156, 0.0]
4R0AF5551500c3af555AF555_Cytoplasm_Intensity_AverageAF555cytoplasm[0.28685356234627135, 0.13009829239513535, 0.2...[1.0, 0.48627450980392156, 0.0]
.................................
103R8Sting1000c4stingSting_Cytoplasm_Intensity_AverageStingcytoplasm[0.7684256891219349, 0.8992667116749021, 0.817...[0.10196078431372549, 0.788235294117647, 0.219...
104R8Sting1000c4stingSting_Nucleus_Intensity_AverageStingnucleus[0.7684256891219349, 0.8992667116749021, 0.817...[0.10196078431372549, 0.788235294117647, 0.219...
105R8CD11b1500c5cd11bCD11b_Cell_Intensity_AverageCD11bcell[0.7684256891219349, 0.8992667116749021, 0.817...[0.9098039215686274, 0.0, 0.043137254901960784]
106R8CD11b1500c5cd11bCD11b_Cytoplasm_Intensity_AverageCD11bcytoplasm[0.7684256891219349, 0.8992667116749021, 0.817...[0.9098039215686274, 0.0, 0.043137254901960784]
107R8CD11b1500c5cd11bCD11b_Nucleus_Intensity_AverageCD11bnucleus[0.7684256891219349, 0.8992667116749021, 0.817...[0.9098039215686274, 0.0, 0.043137254901960784]
\n", "

108 rows Ɨ 10 columns

\n", "
" ], "text/plain": [ " Round Target Exp Channel target_lower \\\n", "0 R0 AF488 300 c2 af488 \n", "1 R0 AF488 300 c2 af488 \n", "2 R0 AF488 300 c2 af488 \n", "3 R0 AF555 1500 c3 af555 \n", "4 R0 AF555 1500 c3 af555 \n", ".. ... ... ... ... ... \n", "103 R8 Sting 1000 c4 sting \n", "104 R8 Sting 1000 c4 sting \n", "105 R8 CD11b 1500 c5 cd11b \n", "106 R8 CD11b 1500 c5 cd11b \n", "107 R8 CD11b 1500 c5 cd11b \n", "\n", " full_column marker localisation \\\n", "0 AF488_Cell_Intensity_Average AF488 cell \n", "1 AF488_Cytoplasm_Intensity_Average AF488 cytoplasm \n", "2 AF488_Nucleus_Intensity_Average AF488 nucleus \n", "3 AF555_Cell_Intensity_Average AF555 cell \n", "4 AF555_Cytoplasm_Intensity_Average AF555 cytoplasm \n", ".. ... ... ... \n", "103 Sting_Cytoplasm_Intensity_Average Sting cytoplasm \n", "104 Sting_Nucleus_Intensity_Average Sting nucleus \n", "105 CD11b_Cell_Intensity_Average CD11b cell \n", "106 CD11b_Cytoplasm_Intensity_Average CD11b cytoplasm \n", "107 CD11b_Nucleus_Intensity_Average CD11b nucleus \n", "\n", " round_color \\\n", "0 [0.28685356234627135, 0.13009829239513535, 0.2... \n", "1 [0.28685356234627135, 0.13009829239513535, 0.2... \n", "2 [0.28685356234627135, 0.13009829239513535, 0.2... \n", "3 [0.28685356234627135, 0.13009829239513535, 0.2... \n", "4 [0.28685356234627135, 0.13009829239513535, 0.2... \n", ".. ... \n", "103 [0.7684256891219349, 0.8992667116749021, 0.817... \n", "104 [0.7684256891219349, 0.8992667116749021, 0.817... \n", "105 [0.7684256891219349, 0.8992667116749021, 0.817... \n", "106 [0.7684256891219349, 0.8992667116749021, 0.817... \n", "107 [0.7684256891219349, 0.8992667116749021, 0.817... \n", "\n", " channel_color \n", "0 [0.00784313725490196, 0.24313725490196078, 1.0] \n", "1 [0.00784313725490196, 0.24313725490196078, 1.0] \n", "2 [0.00784313725490196, 0.24313725490196078, 1.0] \n", "3 [1.0, 0.48627450980392156, 0.0] \n", "4 [1.0, 0.48627450980392156, 0.0] \n", ".. ... \n", "103 [0.10196078431372549, 0.788235294117647, 0.219... \n", "104 [0.10196078431372549, 0.788235294117647, 0.219... \n", "105 [0.9098039215686274, 0.0, 0.043137254901960784] \n", "106 [0.9098039215686274, 0.0, 0.043137254901960784] \n", "107 [0.9098039215686274, 0.0, 0.043137254901960784] \n", "\n", "[108 rows x 10 columns]" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add in the color information in both RGB (range 0-1) and hex values, for use in visualizations\n", "metadata['round_color'] = metadata.apply(lambda row: round_color_dict[row['Round']], axis = 1)\n", "metadata['channel_color'] = metadata.apply(lambda row: channel_color_dict[row['Channel']], axis = 1)\n", "\n", "metadata" ] }, { "cell_type": "code", "execution_count": 78, "id": "9cca88fa", "metadata": {}, "outputs": [], "source": [ "import hvplot.pandas\n", "import numpy as np\n", "import pandas as pd\n", "import panel as pn\n", "\n", "PRIMARY_COLOR = \"#0072B5\"\n", "SECONDARY_COLOR = \"#B54300\"\n", "CSV_FILE = (\n", " \"https://raw.githubusercontent.com/holoviz/panel/main/examples/assets/occupancy.csv\"\n", ")" ] }, { "cell_type": "code", "execution_count": 79, "id": "3c7402e9", "metadata": {}, "outputs": [], "source": [ "def transform_data(variable, window, sigma):\n", " \"\"\"Calculates the rolling average and identifies outliers\"\"\"\n", " avg = metadata[variable].rolling(window=window).mean()\n", " residual = metadata[variable] - avg\n", " std = residual.rolling(window=window).std()\n", " outliers = np.abs(residual) > std * sigma\n", " return avg, avg[outliers]\n", "\n", "\n", "def get_plot(variable=\"Exp\", window=30, sigma=10):\n", " \"\"\"Plots the rolling average and the outliers\"\"\"\n", " avg, highlight = transform_data(variable, window, sigma)\n", " return avg.hvplot(\n", " height=300, legend=False, color=PRIMARY_COLOR\n", " ) * highlight.hvplot.scatter(color=SECONDARY_COLOR, padding=0.1, legend=False)" ] }, { "cell_type": "code", "execution_count": 80, "id": "38636bb3", "metadata": {}, "outputs": [], "source": [ "variable_widget = pn.widgets.Select(name=\"Target\", value=\"Exp\", options=list(metadata.columns))\n", "window_widget = pn.widgets.IntSlider(name=\"window\", value=30, start=1, end=60)\n", "sigma_widget = pn.widgets.IntSlider(name=\"sigma\", value=10, start=0, end=20)" ] }, { "cell_type": "code", "execution_count": 81, "id": "a4785336", "metadata": {}, "outputs": [ { "data": {}, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ], "text/plain": [ ":Overlay\n", " .Curve.Exp :Curve [index] (Exp)\n", " .Scatter.Exp :Scatter [index] (Exp)" ] }, "execution_count": 81, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "8c06a97e-378a-4028-a51c-8b8fbb1cf019" } }, "output_type": "execute_result" } ], "source": [ "get_plot(variable='Exp', window=20, sigma=10)" ] }, { "cell_type": "code", "execution_count": 82, "id": "39f7b3e6", "metadata": {}, "outputs": [], "source": [ "bound_plot = pn.bind(\n", " get_plot, variable=variable_widget, window=window_widget, sigma=sigma_widget\n", ")" ] }, { "cell_type": "code", "execution_count": 83, "id": "c58b4fb2", "metadata": {}, "outputs": [ { "data": {}, "metadata": {}, "output_type": "display_data" }, { "data": {}, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ], "text/plain": [ "Column\n", " [0] Column(sizing_mode='fixed', width=300)\n", " [0] Select(name='Target', options=['Round', 'Target', ...], value='Exp')\n", " [1] IntSlider(end=60, name='window', start=1, value=30)\n", " [2] IntSlider(end=20, name='sigma', value=10)\n", " [1] ParamFunction(function, _pane=HoloViews, defer_load=False)" ] }, "execution_count": 83, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "31cb1aa6-32cb-4971-9559-0aecc17d32d4" } }, "output_type": "execute_result" } ], "source": [ "widgets = pn.Column(variable_widget, window_widget, sigma_widget, sizing_mode=\"fixed\", width=300)\n", "pn.Column(widgets, bound_plot)" ] }, { "cell_type": "markdown", "id": "f0642911-7a0b-49f6-9598-c8975b188807", "metadata": {}, "source": [ "## I.8. SAVE" ] }, { "cell_type": "code", "execution_count": 68, "id": "67b33926-3ecf-415d-b67e-09d1054eab62", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Do you want to overwrite all existing files without confirmation? (yes/no): \n" ] }, { "ename": "KeyboardInterrupt", "evalue": "Interrupted by user", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[68], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m filename \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m was overwritten!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 13\u001b[0m user_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28minput\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFile by name \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m filename \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m already exists. Do you want to overwrite it? (yes/no): \u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m user_response\u001b[38;5;241m.\u001b[39mlower()\u001b[38;5;241m.\u001b[39mstrip() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124myes\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 15\u001b[0m df_save \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSample_ID\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m sample, :]\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py:1262\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m 1260\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1261\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m StdinNotImplementedError(msg)\n\u001b[0;32m-> 1262\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_input_request(\n\u001b[1;32m 1263\u001b[0m \u001b[38;5;28mstr\u001b[39m(prompt),\n\u001b[1;32m 1264\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent_ident[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshell\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_parent(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshell\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 1266\u001b[0m password\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 1267\u001b[0m )\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py:1305\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;66;03m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[1;32m 1304\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInterrupted by user\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1305\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1306\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlog\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid Message:\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user" ] } ], "source": [ "overwrite_all = input(\"Do you want to overwrite all existing files without confirmation? (yes/no): \")\n", "overwrite_all = overwrite_all.lower().strip()\n", "\n", "for sample in ls_samples:\n", " sample_id = sample.split('.csv')[0]\n", " filename = os.path.join(output_data_dir, sample_id + \"_\" + step_suffix + \".csv\")\n", " if os.path.exists(filename):\n", " if overwrite_all == 'yes':\n", " df_save = df.loc[df['Sample_ID'] == sample, :]\n", " df_save.to_csv(filename, index=True, index_label='ID', mode='w') # 'mode='w'' overwrites the file\n", " print(\"File \" + filename + \" was overwritten!\")\n", " else:\n", " user_response = input(\"File by name \" + filename + \" already exists. Do you want to overwrite it? (yes/no): \")\n", " if user_response.lower().strip() == 'yes':\n", " df_save = df.loc[df['Sample_ID'] == sample, :]\n", " df_save.to_csv(filename, index=True, index_label='ID', mode='w') # 'mode='w'' overwrites the file\n", " print(\"File \" + filename + \" was overwritten!\")\n", " else:\n", " print(\"File was not overwritten. Moving to the next sample.\")\n", " else:\n", " df_save = df.loc[df['Sample_ID'] == sample, :]\n", " df_save.to_csv(filename, index=True, index_label='ID') # Save normally if the file doesn't exist\n", " print(\"File \" + filename + \" was created and saved !\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e41a9fb8", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }