CycIF / Step3_Background_Substraction.py
KashyapiNagaHarshitha's picture
Rename Background_Substraction.py to Step3_Background_Substraction.py
ea9e947 verified
raw
history blame contribute delete
No virus
31.3 kB
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import os
import random
import re
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import matplotlib.colors as mplc
import subprocess
import warnings
from scipy import signal
import plotly.figure_factory as ff
import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, plot
import plotly.express as px
from my_modules import *
os.getcwd()
import panel as pn
# In[2]:
pn.extension()
#Silence FutureWarnings & UserWarnings
warnings.filterwarnings('ignore', category= FutureWarning)
warnings.filterwarnings('ignore', category= UserWarning)
# ## II.2. *DIRECTORIES
# In[5]:
# Set base directory
##### MAC WORKSTATION #####
#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'
###########################
##### WINDOWS WORKSTATION #####
#base_dir = r'C:\Users\LaboLabrie\gerz2701\cyCIF-pipeline\Set_B'
###############################
##### LOCAL WORKSTATION #####
#input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'
#############################
#set_name = 'Set_A'
#set_name = 'test'
#present_dir = os.path.dirname(os.path.realpath(__file__))
#input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
#base_dir = input_path
'''
# Function to change permissions recursively with error handling
def change_permissions_recursive(path, mode):
for root, dirs, files in os.walk(path):
for dir in dirs:
try:
os.chmod(os.path.join(root, dir), mode)
except Exception as e:
print(f"An error occurred while changing permissions for directory {os.path.join(root, dir)}: {e}")
for file in files:
try:
os.chmod(os.path.join(root, file), mode)
except Exception as e:
print(f"An error occurred while changing permissions for file {os.path.join(root, file)}: {e}")
change_permissions_recursive(base_dir, 0o777)
change_permissions_recursive('/code', 0o777)
'''
base_dir = '/code/wetransfer_data-zip_2024-05-17_1431'
set_path = 'test'
selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
ls_samples = ['Ashlar_Exposure_Time.csv', 'new_data.csv', 'DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
set_name = set_path
# In[7]:
project_name = set_name # Project name
step_suffix = 'bs' # Curent part (here part II)
previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)
# Initial input data directory
input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
# BS output directories
output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
# BS images subdirectory
output_images_dir = os.path.join(output_data_dir,"images")
# Data and Metadata directories
# Metadata directories
metadata_dir = os.path.join(base_dir, project_name + "_metadata")
# images subdirectory
metadata_images_dir = os.path.join(metadata_dir,"images")
# Create directories if they don't already exist
for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
if not os.path.exists(d):
print("Creation of the" , d, "directory...")
os.makedirs(d)
else :
print("The", d, "directory already exists !")
os.chdir(input_data_dir)
# In[8]:
# Verify paths
print('base_dir :', base_dir)
print('input_data_dir :', input_data_dir)
print('output_data_dir :', output_data_dir)
print('output_images_dir :', output_images_dir)
print('metadata_dir :', metadata_dir)
print('metadata_images_dir :', metadata_images_dir)
# ## II.3. FILES
#Don't forget to put your data in the projname_data directory !
# ### II.3.1. METADATA
# In[9]:
# Import all metadata we need from the QC/EDA chapter
# METADATA
filename = "marker_intensity_metadata.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: "+filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
metadata = pd.read_csv(filename)
# Verify size with verify_line_no() function in my_modules.py
#verify_line_no(filename, metadata.shape[0] + 1)
# Verify headers
exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
metadata = metadata.dropna()
metadata.head()
# ### II.3.2. NOT_INTENSITIES
# In[10]:
# NOT_INTENSITIES
filename = "not_intensities.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: "+filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
#not_intensities = []
with open(filename, 'r') as fh:
not_intensities = fh.read().strip().split("\n")
# take str, strip whitespace, split on new line character
not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
# Verify size
print("Verifying data read from file is the correct length...\n")
verify_line_no(filename, len(not_intensities))
# Print to console
print("not_intensities =\n", not_intensities)
import os
import pandas as pd
# Function to compare headers (assuming you have this function defined in your my_modules.py)
def compare_headers(expected, actual, description):
missing = [col for col in expected if col not in actual]
if missing:
print(f"WARNING: Missing expected columns in {description}: {missing}")
else:
print(f"All expected columns are present in {description}.")
# Get the current script directory
present_dir = os.path.dirname(os.path.realpath(__file__))
# Define the input path
input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
base_dir = input_path
set_path = 'test'
# Project and step names
project_name = set_path # Project name
previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)
# Initial input data directory
input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
# Metadata directories
metadata_dir = os.path.join(base_dir, project_name + "_metadata")
metadata_images_dir = os.path.join(metadata_dir, "images")
# Define writable directory
writable_directory = '/tmp'
# Check and read metadata file
filename = "marker_intensity_metadata.csv"
filename = os.path.join(metadata_dir, filename)
# Check if the file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: " + filename)
else:
print("The", filename, "file was imported for further analysis!")
# Open, read in information
metadata = pd.read_csv(filename)
# Verify headers
exp_cols = ['Round', 'Target', 'Channel', 'target_lower', 'full_column', 'marker', 'localisation']
compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
metadata = metadata.dropna()
print(metadata.head())
# Example of writing to the writable directory
output_file_path = os.path.join(writable_directory, 'processed_metadata.csv')
try:
metadata.to_csv(output_file_path, index=False)
print(f"Processed metadata written successfully to {output_file_path}")
except PermissionError as e:
print(f"Permission denied: Unable to write the file at {output_file_path}. Error: {e}")
except Exception as e:
print(f"An error occurred: {e}")
# ### II.3.3. FULL_TO_SHORT_COLUMN_NAMES
# In[11]:
# FULL_TO_SHORT_COLUMN_NAMES
filename = "full_to_short_column_names.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: " + filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
# Verify size
print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
# Print information
print('full_to_short_names =\n',full_to_short_names)
# ### II.3.4. SHORT_TO_FULL_COLUMN_NAMES
# In[12]:
# SHORT_TO_FULL_COLUMN_NAMES
filename = "short_to_full_column_names.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: " + filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
# Verify size
print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
# Print information
print('short_to_full_names =\n',short_to_full_names)
# ### II.3.5. SAMPLES COLORS
# In[13]:
# COLORS INFORMATION
filename = "sample_color_data.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: " + filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
df = df.drop(columns = ['hex'])
# our tuple of float values for rgb, (r, g, b) was read in
# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
# substrings and convert them back into floats
df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
# Verify size
print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()
# Print information
print('sample_color_dict =\n',sample_color_dict)
sample_color_dict = pd.DataFrame.from_dict(sample_color_dict, orient='index', columns=['R', 'G', 'B'])
# In[14]:
sample_color_dict
# ### II.3.6. CHANNELS COLORS
# In[15]:
# CHANNELS
filename = "channel_color_data.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: "+filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
df = df.drop(columns = ['hex'])
# our tuple of float values for rgb, (r, g, b) was read in
# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
# substrings and convert them back into floats
df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
# Verify size
print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
channel_color_dict = df.set_index('Channel')['rgb'].to_dict()
# Print information
print('channel_color_dict =\n',channel_color_dict)
channel_color_dict = pd.DataFrame.from_dict(channel_color_dict, orient='index', columns=['R', 'G', 'B'])
# In[16]:
channel_color_dict
# ### II.3.7. ROUNDS COLORS
# In[17]:
# ROUND
filename = "round_color_data.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
if not os.path.exists(filename):
print("WARNING: Could not find desired file: "+filename)
else :
print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
df = df.drop(columns = ['hex'])
# our tuple of float values for rgb, (r, g, b) was read in
# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
# substrings and convert them back into floats
df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
# Verify size
print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
round_color_dict = df.set_index('Round')['rgb'].to_dict()
# Print information
print('round_color_dict =\n',round_color_dict)
round_color_dict = pd.DataFrame.from_dict(round_color_dict, orient='index', columns=['R', 'G', 'B'])
# In[18]:
round_color_dict
# ### II.3.8. DATA
# In[19]:
# DATA
# List files in the directory
# Check if the directory exists
if os.path.exists(input_data_dir):
ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_qc_eda.csv")]
print("The following CSV files were detected:")
print([sample for sample in ls_samples])
else:
print(f"The directory {input_data_dir} does not exist.")
# In[20]:
# Import all the others files
dfs = {}
# Set variable to hold default header values
# First gather information on expected headers using first file in ls_samples
# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
expected_headers = df.columns.values
print(expected_headers)
###############################
# !! This may take a while !! #
###############################
for sample in ls_samples:
file_path = os.path.join(input_data_dir,sample)
try:
# Read the CSV file
df = pd.read_csv(file_path, index_col=0)
# Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
if not df.empty:
# Reorder the columns to match the expected headers list
df = df.reindex(columns=expected_headers)
print(sample, "file is processed !\n")
#print(df)
except pd.errors.EmptyDataError:
print(f'\nEmpty data error in {sample} file. Removing from analysis...')
ls_samples.remove(sample)
# Add df to dfs
dfs[sample] = df
#print(dfs)
# In[21]:
# Merge dfs into one df
df = pd.concat(dfs.values(), ignore_index=False , sort = False)
#del dfs
df.head()
# In[22]:
df.shape
# In[23]:
# Check for NaN entries (should not be any unless columns do not align)
# False means no NaN entries
# True means NaN entries
df.isnull().any().any()
# ## II.4. *FILTERING
# In[24]:
print("Number of cells before filtering :", df.shape[0])
cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
# In[25]:
#print(df)
# In[26]:
# Delete small cells and objects w/high AF555 Signal (RBCs)
# We usually use the 95th percentile calculated during QC_EDA
df = df.loc[(df['Nucleus_Size'] > 42 )]
df = df.loc[(df['Nucleus_Size'] < 216)]
print("Number of cells after filtering on nucleus size:", df.shape[0])
df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]
print("Number of cells after filtering on AF555A ___ intensity:", df.shape[0])
cells_after_filter_nucleus = f"Number of cells after filtering on nucleus size: {df.shape[0]}"
cells_after_filter_intensity = f"Number of cells after filtering on AF555A ___ intensity: {df.shape[0]}"
# In[27]:
# Assign cell type
# Assign tumor cells at each row at first (random assigning here just for development purposes)
# Generate random values for cell_type column
random_values = np.random.randint(0, 10, size=len(df))
# Assign cell type based on random values
def assign_cell_type(n):
return np.random.choice(['STROMA','CANCER','IMMUNE','ENDOTHELIAL'])
df['cell_type'] = np.vectorize(assign_cell_type)(random_values)
df['cell_subtype'] = df['cell_type'].copy()
# In[28]:
filtered_dataframe = df
df.head()
# In[29]:
quality_control_df = filtered_dataframe
# In[30]:
def check_index_format(index_str, ls_samples):
"""
Checks if the given index string follows the specified format.
Args:
index_str (str): The index string to be checked.
ls_samples (list): A list of valid sample names.
Returns:
bool: True if the index string follows the format, False otherwise.
"""
# Split the index string into parts
parts = index_str.split('_')
# Check if there are exactly 3 parts
if len(parts) != 3:
print(len(parts))
return False
# Check if the first part is in ls_samples
sample_name = parts[0]
if f'{sample_name}_qc_eda.csv' not in ls_samples:
print(sample_name)
return False
# Check if the second part is in ['cell', 'cytoplasm', 'nucleus']
location = parts[1]
valid_locations = ['Cell', 'Cytoplasm', 'Nucleus']
if location not in valid_locations:
print(location)
return False
# Check if the third part is a number
try:
index = int(parts[2])
except ValueError:
print(index)
return False
# If all checks pass, return True
return True
# In[31]:
# Let's take a look at a few features to make sure our dataframe is as expected
df.index
def check_format_ofindex(index):
for index in df.index:
check_index = check_index_format(index, ls_samples)
if check_index is False:
index_format = "Bad"
return index_format
index_format = "Good"
return index_format
print(check_format_ofindex(df.index))
# In[32]:
import panel as pn
import pandas as pd
def quality_check(file, not_intensities):
# Load the output file
df = file
# Check Index
check_index = check_format_ofindex(df.index)
# Check Shape
check_shape = df.shape
# Check for NaN entries
check_no_null = df.isnull().any().any()
mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
if (mean_intensity == 0).any():
df = df.loc[mean_intensity > 0, :]
print("df.shape after removing 0 mean values: ", df.shape)
check_zero_intensities = f'Shape after removing 0 mean values: {df.shape}'
else:
print("No zero intensity values.")
check_zero_intensities = "No zero intensity values."
# Create a quality check results table
quality_check_results_table = pd.DataFrame({
'Check': ['Index', 'Shape', 'Check for NaN Entries', 'Check for Zero Intensities'],
'Result': [str(check_index), str(check_shape), str(check_no_null), check_zero_intensities]
})
# Create a quality check results component
quality_check_results_component = pn.Card(
pn.pane.DataFrame(quality_check_results_table),
title="Quality Control Results",
header_background="#2196f3",
header_color="white",
)
return quality_check_results_component
# ## II.5. CELL TYPES COLORS
# Establish colors to use throughout workflow
# we want colors that are categorical, since Cell Type is a non-ordered category.
# A categorical color palette will have dissimilar colors.
# Get those unique colors
cell_types = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
color_values = sb.color_palette("hls", n_colors = len(cell_types))
# each color value is a tuple of three values: (R, G, B)
print("Unique cell types are:",df.cell_type.unique())
# Display those unique colors
sb.palplot(sb.color_palette(color_values))
# In[33]:
# Define your custom colors for each cell type
custom_colors = {
'CANCER': (0.1333, 0.5451, 0.1333),
'STROMA': (0.4, 0.4, 0.4),
'IMMUNE': (1, 1, 0),
'ENDOTHELIAL': (0.502, 0, 0.502)
}
# Retrieve the list of cell types
cell_types = list(custom_colors.keys())
# Extract the corresponding colors from the dictionary
color_values = [custom_colors[cell] for cell in cell_types]
# Display the colors
sb.palplot(sb.color_palette(color_values))
# In[34]:
# Store in a dctionnary
celltype_color_dict = dict(zip(cell_types, color_values))
celltype_color_dict
# In[35]:
celltype_color_df = pd.DataFrame.from_dict(celltype_color_dict, orient='index', columns=['R', 'G', 'B'])
# In[36]:
# Save color information (mapping and legend) to metadata directory
# Create dataframe
celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
celltype_color_df.head()
# Save to file in metadatadirectory
present_dir = os.path.dirname(os.path.realpath(__file__))
filename = os.path.join(present_dir, "celltype_color_data.csv")
#filename = "celltype_color_data.csv"
filename = os.path.join(metadata_dir, filename)
celltype_color_df.to_csv(filename, index = False)
print("File" + filename + " was created!")
# In[37]:
celltype_color_df.head()
# In[38]:
# Legend of cell type info only
g = plt.figure(figsize = (1,1)).add_subplot(111)
g.axis('off')
handles = []
for item in celltype_color_dict.keys():
h = g.bar(0,0, color = celltype_color_dict[item],
label = item, linewidth =0)
handles.append(h)
first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),
filename = "Celltype_legend.png"
filename = os.path.join(metadata_images_dir, filename)
plt.savefig(filename, bbox_inches = 'tight')
# In[39]:
metadata
# In[40]:
df.columns.values
# In[41]:
df.shape
# In[42]:
metadata.shape
# ## II.6. *CELL SUBTYPES COLORS
# In[43]:
# Establish colors to use throughout workflow
# we want colors that are categorical, since Cell Type is a non-ordered category.
# A categorical color palette will have dissimilar colors.
# Get those unique colors
cell_subtypes = ['DC','B', 'TCD4','TCD8','M1','M2','Treg', \
'IMMUNE_OTHER', 'CANCER', 'αSMA_myCAF',\
'STROMA_OTHER', 'ENDOTHELIAL']
color_values = sb.color_palette("Paired",n_colors = len(cell_subtypes))
# each color value is a tuple of three values: (R, G, B)
print("Unique cell types are:",df.cell_subtype.unique())
# Display those unique colors
sb.palplot(sb.color_palette(color_values))
# In[44]:
# Store in a dctionnary
cellsubtype_color_dict = dict(zip(cell_subtypes, color_values))
cellsubtype_color_dict
# In[45]:
cellsubtype_color_df = pd.DataFrame.from_dict(cellsubtype_color_dict, orient='index', columns=['R', 'G', 'B'])
# In[46]:
# Save color information (mapping and legend) to metadata directory
# Create dataframe
cellsubtype_color_df = color_dict_to_df(cellsubtype_color_dict, "cell_subtype")
# Save to file in metadatadirectory
filename = "cellsubtype_color_data.csv"
filename = os.path.join(metadata_dir, filename)
cellsubtype_color_df.to_csv(filename, index = False)
print("File" + filename + " was created!")
# In[47]:
cellsubtype_color_df.head()
# In[48]:
# Legend of cell type info only
g = plt.figure(figsize = (1,1)).add_subplot(111)
g.axis('off')
handles = []
for item in cellsubtype_color_dict.keys():
h = g.bar(0,0, color = cellsubtype_color_dict[item],
label = item, linewidth =0)
handles.append(h)
first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell subtype'),
filename = "Cellsubtype_legend.png"
filename = os.path.join(metadata_images_dir, filename)
plt.savefig(filename, bbox_inches = 'tight')
# ## II.7. IMMUNE CHECKPOINT COLORS
# In[49]:
# Assign IMMUNE SUBTYPES
df['cell_subtype'] = df['cell_type'].copy()
df['immune_checkpoint'] = 'none'
df
immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'None']
color_values = sb.color_palette("husl",n_colors=len(immune_checkpoint))
# each color value is a tuple of three values: (R, G, B)
print("Unique immune checkpoint are:",df.immune_checkpoint.unique())
# Display those unique colors
sb.palplot(sb.color_palette(color_values))
# In[50]:
immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']
# Base colors for the primary checkpoints
base_colors = sb.color_palette("husl", n_colors=3) # Three distinct colors
# Function to mix two RGB colors
def mix_colors(color1, color2):
return tuple((c1 + c2) / 2 for c1, c2 in zip(color1, color2))
# Generate mixed colors for the combinations of checkpoints
mixed_colors = [
mix_colors(base_colors[0], base_colors[1]), # Mix B7H4 and PDL1
# mix_colors(base_colors[0], base_colors[2]), # Mix B7H4 and PD1
# mix_colors(base_colors[1], base_colors[2]), # Mix PDL1 and PD1
tuple(np.mean(base_colors, axis=0)) # Mix B7H4, PDL1, and PD1
]
# Adding the color for 'None'
#none_color = [(0.8, 0.8, 0.8)] # A shade of gray
# Combine all colors into one list
color_values = base_colors + mixed_colors #+ none_color
# Display unique immune checkpoint combinations
print("Unique immune checkpoint combinations are:", immune_checkpoint)
# Display the unique colors
sb.palplot(color_values)
# In[51]:
# Store in a dctionnary
immunecheckpoint_color_dict = dict(zip(immune_checkpoint, color_values))
immunecheckpoint_color_dict
# In[52]:
# Save color information (mapping and legend) to metadata directory
# Create dataframe
immunecheckpoint_color_df = color_dict_to_df(immunecheckpoint_color_dict, "immune_checkpoint")
immunecheckpoint_color_df.head()
# Save to file in metadatadirectory
filename = "immunecheckpoint_color_data.csv"
filename = os.path.join(metadata_dir, filename)
immunecheckpoint_color_df.to_csv(filename, index = False)
print("File " + filename + " was created!")
# In[53]:
# Legend of cell type info only
g = plt.figure(figsize = (1,1)).add_subplot(111)
g.axis('off')
handles = []
for item in immunecheckpoint_color_dict.keys():
h = g.bar(0,0, color = immunecheckpoint_color_dict[item],
label = item, linewidth =0)
handles.append(h)
first_legend = plt.legend(handles=handles, loc='upper right', title = 'Immune checkpoint'),
filename = "Cellsubtype_legend.png"
filename = os.path.join(metadata_images_dir, filename)
plt.savefig(filename, bbox_inches = 'tight')
# ## II.7. BACKGROUND SUBSTRACTION
# In[54]:
def do_background_sub(col, df, metadata):
#print(col.name)
location = metadata.loc[metadata['full_column'] == col.name, 'localisation'].values[0]
#print('location = ' + location)
channel = metadata.loc[metadata['full_column'] == col.name, 'Channel'].values[0]
#print('channel = ' + channel)
af_target = metadata.loc[
(metadata['Channel']==channel) \
& (metadata['localisation']==location) \
& (metadata['target_lower'].str.contains(r'^af\d{3}$')),\
'full_column'].values[0]
return col - df.loc[:,af_target]
# In[55]:
metadata_with_localisation = metadata
metadata_with_localisation
# In[56]:
#Normalization
df.loc[:, ~df.columns.isin(not_intensities)] = \
df.loc[:, ~df.columns.isin(not_intensities)].apply(lambda column: divide_exp_time(column, 'Exp', metadata), axis = 0)
# In[57]:
normalization_df = df
normalization_df.head()
# In[58]:
# Do background subtraction
# this uses a df (metadata) outside of
# the scope of the lambda...
# careful that this might break inside of a script...
df.loc[:,~df.columns.isin(not_intensities)] = \
df.loc[:,~df.columns.isin(not_intensities)].apply(lambda column: do_background_sub(column, df, metadata),axis = 0)
# In[59]:
df
background_substraction_df = df
background_substraction_df.head()
# In[60]:
# Drop AF columns
df = df.filter(regex='^(?!AF\d{3}).*')
print(df.columns.values)
# In[61]:
intensities_df = df.loc[:, ~df.columns.isin(not_intensities)]
intensities_df
# In[62]:
normalization_df.head()
# In[63]:
metadata_df = metadata_with_localisation
intensities_df = intensities_df # Assuming you have loaded the intensities DataFrame
# Create a list of column names from the intensities DataFrame
column_names = intensities_df.columns.tolist()
# Create a Select widget for choosing a column
column_selector = pn.widgets.Select(name='Select Column', options=column_names)
# Create a Markdown widget to display the selected column's information
column_info_md = pn.pane.Markdown(name='Column Information', width=400, object='Select a column to view its information.')
# Define a function to update the column information
def update_column_info(event):
selected_column = event.new
if selected_column:
# Get the selected column's intensity
intensity = intensities_df[selected_column].values
# Get the corresponding channel, localization, and experiment from the metadata
channel = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Channel'].values[0]
localization = metadata_df.loc[metadata_df['full_column'] == selected_column, 'localisation'].values[0]
exposure = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Exp'].values[0]
# Create a Markdown string with the column information
column_info_text = f"**Intensity:** {intensity}\n\n**Channel:** {channel}\n\n**Localization:** {localization}\n\n**Exposure:** {exposure}"
# Update the Markdown widget with the column information
column_info_md.object = column_info_text
else:
column_info_md.object = 'Select a column to view its information.'
# Watch for changes in the column selector and update the column information
column_selector.param.watch(update_column_info, 'value')
# Create a Panel app and display the widgets
bs_info = pn.Column(column_selector, column_info_md)
bs_info
normalization_df.head()
# In[65]:
import panel as pn
df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
app2 = pn.template.GoldenTemplate(
site="Cyc-IF",
title=" Background-Substraction",
main=[pn.Tabs(("Background-Substraction",pn.Column(
#pn.Column(pn.pane.Markdown("### Celltype thresholds"), pn.pane.DataFrame(celltype_color_df)),
#pn.Column(pn.pane.Markdown("### Cell Subtype thresholds"), pn.pane.DataFrame(cellsubtype_color_df)),
#pn.Column(pn.pane.Markdown("### Cells Before Filtering"),pn.pane.Str(cells_before_filter)),
#pn.Column(pn.pane.Markdown("### Cells After Filtering Nucleus"),pn.pane.Str(cells_after_filter_nucleus)),
#pn.Column(pn.pane.Markdown("### Cells After Filtering Intensity"),pn.pane.Str(cells_after_filter_intensity)),
#pn.Column(pn.pane.Markdown("### Dataframe after filtering"), pn.pane.DataFrame(filtered_dataframe.head())),
pn.Column(pn.pane.Markdown("### The metadata obtained that specifies the localisation:"), metadata_with_localisation.head(8)),
pn.Column(pn.pane.Markdown("### The channels and exposure of each intensities column"), bs_info),
pn.Column(pn.pane.Markdown("### Dataframe after perfroming normalization"),pn.pane.DataFrame(normalization_df.head(), width = 1500)),
pn.Column(pn.pane.Markdown("### Dataframe after background Substraction"), pn.pane.DataFrame(background_substraction_df.head()),
))),
("Quality Control", pn.Column(
quality_check(quality_control_df, not_intensities)
#pn.pane.Markdown("### The Quality check results are:"), quality_check_results(check_shape, check_no_null, check_all_expected_files_present, check_zero_intensities)
))
)],)
app2.servable()