multiTAP

Running

App Files Files Community

multiTAP / app.py

ivangzf

update IMC time reference

f13e642 over 1 year ago

raw

history blame contribute delete

24.7 kB

	# give some time reference to the user
	print('Importing Gradio app packages... (first launch takes about 3-5 minutes)')

	import gradio as gr
	import yaml
	import skimage
	import numpy as np
	import matplotlib.pyplot as plt
	from matplotlib.pyplot import cm
	import plotly.express as px
	import plotly.graph_objs as go
	from plotly.subplots import make_subplots
	import os
	import seaborn as sns

	from cytof import classes
	from classes import CytofImage, CytofCohort, CytofImageTiff
	from cytof.hyperion_preprocess import cytof_read_data_roi
	from cytof.utils import show_color_table

	OUTDIR = './output'

	def cytof_tiff_eval(file_path, marker_path, cytof_state):
	# set to generic names because uploaded filenames is unpredictable
	slide = 'slide0'
	roi = 'roi1'

	# read in the data
	cytof_img, _ = cytof_read_data_roi(file_path, slide, roi)

	# case 1. user uploaded TXT/CSV
	if marker_path is None:
	# get markers
	cytof_img.get_markers()

	# prepsocess
	cytof_img.preprocess()
	cytof_img.get_image()

	# case 2. user uploaded TIFF
	else:
	labels_markers = yaml.load(open(marker_path, "rb"), Loader=yaml.Loader)
	cytof_img.set_markers(**labels_markers)

	viz = cytof_img.check_channels(ncols=3, savedir='.')

	msg = f'Your uploaded TIFF has {len(cytof_img.markers)} markers'
	cytof_state = cytof_img

	return msg, viz, cytof_state


	def channel_select(cytof_img):
	# one for define unwanted channels, one for defining nuclei, one for defining membrane
	return gr.Dropdown(choices=cytof_img.channels, multiselect=True), gr.Dropdown(choices=cytof_img.channels, multiselect=True), gr.Dropdown(choices=cytof_img.channels, multiselect=True)

	def nuclei_select(cytof_img):
	# one for defining nuclei, one for defining membrane
	return gr.Dropdown(choices=cytof_img.channels, multiselect=True), gr.Dropdown(choices=cytof_img.channels, multiselect=True)

	def modify_channels(cytof_img, unwanted_channels, nuc_channels, mem_channels):
	"""
	3-step function. 1) removes unwanted channels, 2) define nuclei channels, 3) define membrane channels
	"""

	cytof_img_updated = cytof_img.copy()
	cytof_img_updated.remove_special_channels(unwanted_channels)

	# define and remove nuclei channels
	nuclei_define = {'nuclei' : nuc_channels}
	channels_rm = cytof_img_updated.define_special_channels(nuclei_define)
	cytof_img_updated.remove_special_channels(channels_rm)

	# define and keep membrane channels
	membrane_define = {'membrane' : mem_channels}
	cytof_img_updated.define_special_channels(membrane_define)

	# only get image when need to derive from df. CytofImageTIFF has inherent image attribute
	if type(cytof_img_updated) is CytofImage:
	cytof_img_updated.get_image()

	nuclei_channel_str = ', '.join(channels_rm)
	membrane_channel_str = ', '.join(mem_channels)
	msg = 'Your remaining channels are: ' + ', '.join(cytof_img_updated.channels) + '.\n\n Nuclei channels: ' + nuclei_channel_str + '.\n\n Membrane channels: ' + membrane_channel_str
	return msg, cytof_img_updated

	def update_dropdown_options(cytof_img, selected_self, selected_other1, selected_other2):
	"""
	Remove the selected option in the dropdown from the other two dropdowns
	"""
	updated_choices = cytof_img.channels.copy()
	unavail_options = selected_self + selected_other1 + selected_other2
	for opt in unavail_options:
	updated_choices.remove(opt)

	return gr.Dropdown(choices=updated_choices+selected_other1, value=selected_other1, multiselect=True), gr.Dropdown(choices=updated_choices+selected_other2, value=selected_other2, multiselect=True)


	def cell_seg(cytof_img, radius):

	# check if membrane channel available
	use_membrane = 'membrane' in cytof_img.channels
	nuclei_seg, cell_seg = cytof_img.get_seg(use_membrane=use_membrane, radius=radius, show_process=False)

	# visualize nuclei and cells segmentation
	marked_image_nuclei = cytof_img.visualize_seg(segtype="nuclei", show=False)
	marked_image_cell = cytof_img.visualize_seg(segtype="cell", show=False)

	# visualizing nuclei and/or membrane, plus the first marker in channels
	marker_visualized = cytof_img.channels[0]

	# similar to plt.imshow()
	fig = px.imshow(marked_image_cell)

	# add scatter plot dots as legends
	fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='white'), name='membrane boundaries'))
	fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='yellow'), name='nucleus boundaries'))
	fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='red'), name='nucleus'))
	fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='green'), name=marker_visualized))
	fig.update_layout(legend=dict(orientation="v", bgcolor='lightgray'))

	return fig, cytof_img

	def feature_extraction(cytof_img, cohort_state, percentile_threshold):

	# extract and normalize all features
	cytof_img.extract_features(filename=cytof_img.filename)
	cytof_img.feature_quantile_normalization(qs=[percentile_threshold])

	# create dir if not exist
	if not os.path.isdir(OUTDIR):
	os.makedirs(OUTDIR)
	cytof_img.export_feature(f"df_feature_{percentile_threshold}normed", os.path.join(OUTDIR, f"feature_{percentile_threshold}normed.csv"))
	df_feature = getattr(cytof_img, f"df_feature_{percentile_threshold}normed" )

	# each file upload in Gradio will always have the same filename
	# also the temp path created by Gradio is too long to be visually satisfying.
	df_feature = df_feature.loc[:, df_feature.columns != 'filename']

	# calculates quantiles between each marker and cell
	cytof_img.calculate_quantiles(qs=[75])

	dict_cytof_img = {f"{cytof_img.slide}_{cytof_img.roi}": cytof_img}

	# convert to cohort and prepare downstream analysis
	cytof_cohort = CytofCohort(cytof_images=dict_cytof_img, dir_out=OUTDIR)
	cytof_cohort.batch_process_feature()
	cytof_cohort.generate_summary()

	cohort_state = cytof_cohort

	msg = 'Feature extraction completed!'
	return cytof_img, cytof_cohort, df_feature

	def co_expression(cytof_img, percentile_threshold):
	feat_name = f"{percentile_threshold}normed"
	df_co_pos_prob, df_expected_prob = cytof_img.roi_co_expression(feature_name=feat_name, accumul_type='sum', return_components=False)
	epsilon = 1e-6 # avoid divide by 0 or log(0)

	# Normalize and fix Nan
	edge_percentage_norm = np.log10(df_co_pos_prob.values / (df_expected_prob.values+epsilon) + epsilon)

	# if observed/expected = 0, then log odds ratio will have log10(epsilon)
	# no observed means co-expression cannot be determined, does not mean strong negative co-expression
	edge_percentage_norm[edge_percentage_norm == np.log10(epsilon)] = 0

	# do some post processing
	marker_all_clean = [m.replace('_cell_sum', '') for m in df_expected_prob.columns]

	# fig = plt.figure()
	clustergrid = sns.clustermap(edge_percentage_norm,
	# clustergrid = sns.clustermap(edge_percentage_norm,
	center=np.log10(1 + epsilon), cmap='RdBu_r', vmin=-1, vmax=3,
	xticklabels=marker_all_clean, yticklabels=marker_all_clean)

	# retrieve matplotlib.Figure object from clustermap
	fig = clustergrid.ax_heatmap.get_figure()

	return fig, cytof_img

	def spatial_interaction(cytof_img, percentile_threshold, method, cluster_threshold):
	feat_name = f"{percentile_threshold}normed"

	df_expected_prob, df_cell_interaction_prob = cytof_img.roi_interaction_graphs(feature_name=feat_name, accumul_type='sum', method=method, threshold=cluster_threshold)
	epsilon = 1e-6

	# Normalize and fix Nan
	edge_percentage_norm = np.log10(df_cell_interaction_prob.values / (df_expected_prob.values+epsilon) + epsilon)

	# if observed/expected = 0, then log odds ratio will have log10(epsilon)
	# no observed means interaction cannot be determined, does not mean strong negative interaction
	edge_percentage_norm[edge_percentage_norm == np.log10(epsilon)] = 0

	# do some post processing
	marker_all_clean = [m.replace('_cell_sum', '') for m in df_expected_prob.columns]


	clustergrid = sns.clustermap(edge_percentage_norm,
	# clustergrid = sns.clustermap(edge_percentage_norm,
	center=np.log10(1 + epsilon), cmap='bwr', vmin=-2, vmax=2,
	xticklabels=marker_all_clean, yticklabels=marker_all_clean)

	# retrieve matplotlib.Figure object from clustermap
	fig = clustergrid.ax_heatmap.get_figure()

	return fig, cytof_img

	def get_marker_pos_options(cytof_img):
	options = cytof_img.channels.copy()

	# nuclei is guaranteed to exist after defining channels
	options.remove('nuclei')

	# search for channel "membrane" and delete, skip if cannot find
	try:
	options.remove('membrane')
	except ValueError:
	pass

	return gr.Dropdown(choices=options, interactive=True), gr.Dropdown(choices=options, interactive=True)

	def viz_pos_marker_pair(cytof_img, marker1, marker2, percentile_threshold):

	stain_nuclei1, stain_cell1, color_dict = cytof_img.visualize_marker_positive(
	marker=marker1,
	feature_type="normed",
	accumul_type="sum",
	normq=percentile_threshold,
	show_boundary=True,
	color_list=[(0,0,1), (0,1,0)], # negative, positive
	color_bound=(0,0,0),
	show_colortable=False)

	stain_nuclei2, stain_cell2, color_dict = cytof_img.visualize_marker_positive(
	marker=marker2,
	feature_type="normed",
	accumul_type="sum",
	normq=percentile_threshold,
	show_boundary=True,
	color_list=[(0,0,1), (0,1,0)], # negative, positive
	color_bound=(0,0,0),
	show_colortable=False)

	# create two subplots
	fig = make_subplots(rows=1, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=(f"positive {marker1} cells", f"positive {marker2} cells"))
	fig.add_trace(px.imshow(stain_cell1).data[0], row=1, col=1)
	fig.add_trace(px.imshow(stain_cell2).data[0], row=1, col=2)

	# Synchronize axes
	fig.update_xaxes(matches='x')
	fig.update_yaxes(matches='y')
	fig.update_layout(title_text=" ")

	return fig

	def phenograph(cytof_cohort):
	key_pheno = cytof_cohort.clustering_phenograph()

	df_feats, commus, cluster_protein_exps, figs, figs_scatter, figs_exps = cytof_cohort.vis_phenograph(
	key_pheno=key_pheno,
	level="cohort",
	save_vis=False,
	show_plots=False,
	plot_together=False)

	umap = figs_scatter['cohort']
	expression = figs_exps['cohort']['cell_sum']

	return umap, cytof_cohort

	def cluster_interaction_fn(cytof_img, cytof_cohort):
	# avoid calling the clustering algorithm again. cohort is guaranteed to have one phenogrpah
	key_pheno = list(cytof_cohort.phenograph.keys())[0]

	epsilon = 1e-6
	interacts, clustergrid = cytof_cohort.cluster_interaction_analysis(key_pheno)
	interact = interacts[cytof_img.slide]
	clustergrid_interaction = sns.clustermap(interact, center=np.log10(1+epsilon),
	cmap='RdBu_r', vmin=-1, vmax=1,
	xticklabels=np.arange(interact.shape[0]),
	yticklabels=np.arange(interact.shape[0]))

	# retrieve matplotlib.Figure object from clustermap
	fig = clustergrid.ax_heatmap.get_figure()

	return fig, cytof_img, cytof_cohort

	def get_cluster_pos_options(cytof_img):
	options = cytof_img.channels.copy()

	# nuclei is guaranteed to exist after defining channels
	options.remove('nuclei')

	# search for channel "membrane" and delete, skip if cannot find
	try:
	options.remove('membrane')
	except ValueError:
	pass

	return gr.Dropdown(choices=options, interactive=True)

	def viz_cluster_positive(marker, percentile_threshold, cytof_img, cytof_cohort):

	# avoid calling the clustering algorithm again. cohort is guaranteed to have one phenogrpah
	key_pheno = list(cytof_cohort.phenograph.keys())[0]

	# marker positive cell
	stain_nuclei1, stain_cell1, color_dict = cytof_img.visualize_marker_positive(
	marker=marker,
	feature_type="normed",
	accumul_type="sum",
	normq=percentile_threshold,
	show_boundary=True,
	color_list=[(0,0,1), (0,1,0)], # negative, positive
	color_bound=(0,0,0),
	show_colortable=False)

	# attch PhenoGraph results to individual ROIs
	cytof_cohort.attach_individual_roi_pheno(key_pheno, override=True)

	# PhenoGraph clustering visualization
	pheno_stain_nuclei, pheno_stain_cell, color_dict = cytof_img.visualize_pheno(key_pheno=key_pheno)

	# create two subplots
	fig = make_subplots(rows=1, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=(f"positive {marker} cells", "PhenoGraph clusters on cells"))
	fig.add_trace(px.imshow(stain_cell1).data[0], row=1, col=1)
	fig.add_trace(px.imshow(pheno_stain_cell).data[0], row=1, col=2)

	# Synchronize axes
	fig.update_xaxes(matches='x')
	fig.update_yaxes(matches='y')
	fig.update_layout(title_text=" ")

	return fig, cytof_img, cytof_cohort

	# Gradio App template
	with gr.Blocks() as demo:
	cytof_state = gr.State(CytofImage())

	# used in scenrios where users define/remove channels multiple times
	cytof_original_state = gr.State(CytofImage())

	gr.Markdown("# Step 1. Upload images")
	gr.Markdown('You may upload one or two files depending on your use case.')
	gr.Markdown('Case 1: A single TXT or CSV file that contains information about antibodies, rare heavy metal isotopes, and image channel names. Make sure files are following the CyTOF, IMC, or multiplex data convention. Leave the `Marker File` upload section blank.')
	gr.Markdown('Case 2: Multiple file uploads required. First, a TIFF file containing Regions of Interest (ROIs) stored as multiplexed images. Then, upload a `Marker File` listing the channels to identify the antibodies.')

	with gr.Row(): # first row where 1) asks for TIFF upload and 2) displays marker info
	img_path = gr.File(file_types=[".tiff", '.tif', '.txt', '.csv'], label='(Required) A file containing Regions of Interest (ROIs) of multiplexed imaging slides.')
	img_info = gr.Textbox(label='Marker information', info='Ensure the number of markers displayed below matches the expected number.')

	with gr.Row(equal_height=True): # second row where 1) asks for marker file upload and 2) displays the visualization of individual channels
	with gr.Column():
	marker_path = gr.File(file_types=['.txt'], label='(Optional) Marker File. A list used to identify the antibodies in each TIFF layer. Upload one TXT file.')
	with gr.Row():
	clear_btn = gr.Button("Clear")
	submit_btn = gr.Button("Upload")
	img_viz = gr.Plot(label="Visualization of individual channels")

	gr.Markdown("# Step 2. Modify existing channels")
	gr.Markdown("After visualizing the individual channels, did you notice any that should not be included in the next steps? Remove those if so.")
	gr.Markdown("Define channels designed to visualize nuclei. Optionally, define channels designed to visualize membranes.")

	with gr.Row(equal_height=True): # third row selects nuclei channels
	with gr.Column():
	selected_unwanted_channel = gr.Dropdown(label='(Optional) Select the unwanted channel', interactive=True)
	selected_nuclei = gr.Dropdown(label='(Required) Select the nuclei channel', interactive=True)
	selected_membrane = gr.Dropdown(label='(Optional) Select the membrane channel', interactive=True)

	define_btn = gr.Button('Modify channels')

	channel_feedback = gr.Textbox(label='Channels info update')

	# upload the file, and gather channel info. Then populate to the unwanted_channel, nuclei, and membrane components
	submit_btn.click(
	fn=cytof_tiff_eval, inputs=[img_path, marker_path, cytof_original_state], outputs=[img_info, img_viz, cytof_original_state],
	api_name='upload'
	).success(
	fn=channel_select, inputs=cytof_original_state, outputs=[selected_unwanted_channel, selected_nuclei, selected_membrane]
	)

	selected_unwanted_channel.change(fn=update_dropdown_options, inputs=[cytof_original_state, selected_unwanted_channel, selected_nuclei, selected_membrane], outputs=[selected_nuclei, selected_membrane], api_name='dropdown_monitor1') # api_name used to identify in the endpoints
	selected_nuclei.change(fn=update_dropdown_options, inputs=[cytof_original_state, selected_nuclei, selected_membrane, selected_unwanted_channel], outputs=[selected_membrane, selected_unwanted_channel], api_name='dropdown_monitor2')
	selected_membrane.change(fn=update_dropdown_options, inputs=[cytof_original_state, selected_membrane, selected_nuclei, selected_unwanted_channel], outputs=[selected_nuclei, selected_unwanted_channel], api_name='dropdown_monitor3')

	# modifies the channels per user input
	define_btn.click(fn=modify_channels, inputs=[cytof_original_state, selected_unwanted_channel, selected_nuclei, selected_membrane], outputs=[channel_feedback, cytof_state])

	gr.Markdown('# Step 3. Perform cell segmentation based on the defined nuclei and membrane channels')

	with gr.Row(): # This row defines cell radius and performs segmentation
	with gr.Column():
	cell_radius = gr.Number(value=5, precision=0, label='Cell size', info='Please enter the desired radius for cell segmentation (in pixels; default value: 5)')
	seg_btn = gr.Button("Segment")
	seg_viz = gr.Plot(label="Visualization of the segmentation. Hover over graph to zoom, pan, save, etc.")
	seg_btn.click(fn=cell_seg, inputs=[cytof_state, cell_radius], outputs=[seg_viz, cytof_state])

	gr.Markdown('# Step 4. Extract cell features')

	cohort_state = gr.State(CytofCohort())
	with gr.Row(): # feature extraction related functinos
	with gr.Column():
	gr.CheckboxGroup(choices=['Yes', 'Yes', 'Yes'], label='Note: This step will take significantly longer than the previous ones. A 130MB IMC file takes about 14 minutes to compute. Did you read this note?')
	norm_percentile = gr.Slider(minimum=50, maximum=99, step=1, value=75, interactive=True, label='Normalized quantification percentile')
	extract_btn = gr.Button('Extract')
	feat_df = gr.DataFrame(headers=['id','coordinate_x','coordinate_y','area_nuclei'], label='Feature extraction summary')

	extract_btn.click(fn=feature_extraction, inputs=[cytof_state, cohort_state, norm_percentile],
	outputs=[cytof_state, cohort_state, feat_df])

	gr.Markdown('# Step 5. Downstream analysis')

	with gr.Row(): # show co-expression and spatial analysis
	with gr.Column():
	co_exp_viz = gr.Plot(label="Visualization of cell coexpression of markers")
	co_exp_btn = gr.Button('Run co-expression analysis')

	with gr.Column():
	spatial_viz = gr.Plot(label="Visualization of cell spatial interaction of markers")
	cluster_method = gr.Radio(label='Select the clustering method', value='k-neighbor', choices=['k-neighbor', 'distance'], info='K-neighbor: classifies the threshold number of surrounding cells as neighborhood pairs. Distance: classifies cells within threshold distance as neighborhood pairs.')
	cluster_threshold = gr.Slider(minimum=1, maximum=100, step=1, value=30, interactive=True, label='Clustering threshold')

	spatial_btn = gr.Button('Run spatial interaction analysis')

	co_exp_btn.click(fn=co_expression, inputs=[cytof_state, norm_percentile], outputs=[co_exp_viz, cytof_state])
	# spatial_btn logic is in step6. This is populate the marker positive dropdown options

	gr.Markdown('# Step 6. Visualize positive markers')
	gr.Markdown('Select two markers for side-by-side comparison to visualize their positive states in cells. This serves two purposes. 1) Validate the co-expression analysis results. High expression level should mean a similar number of positive markers within the two slides, whereas low expression level mean a large difference of in the number of positive markers. 2) Validate the spatial interaction analysis results. High interaction means the two positive markers are in close proximity of each other (proximity is previously defined in `clustering threshold`), and vice versa.')

	with gr.Row(): # two marker positive visualization - dropdown options
	selected_marker1 = gr.Dropdown(label='Select one marker', info='Select a marker to visualize', interactive=True)
	selected_marker2 = gr.Dropdown(label='Select another marker', info='Selecting the same marker as the previous one is allowed', interactive=True)
	pos_viz_btn = gr.Button('Visualize these two markers')


	with gr.Row(): # two marker positive visualization - visualization
	marker_pos_viz = gr.Plot(label="Visualization of the two markers. Hover over graph to zoom, pan, save, etc.")

	spatial_btn.click(
	fn=spatial_interaction, inputs=[cytof_state, norm_percentile, cluster_method, cluster_threshold], outputs=[spatial_viz, cytof_state]
	).success(
	fn=get_marker_pos_options, inputs=[cytof_state], outputs=[selected_marker1, selected_marker2]
	)
	pos_viz_btn.click(fn=viz_pos_marker_pair, inputs=[cytof_state, selected_marker1, selected_marker2, norm_percentile], outputs=[marker_pos_viz])


	gr.Markdown('# Step 7. Phenogrpah Clustering')
	gr.Markdown('Cells can be clustered into sub-groups based on the extracted single-cell data. Time reference: a 300MB IMC file takes about 2 minutes to compute.')

	with gr.Row(): # add two plots to visualize phenograph results
	phenograph_umap = gr.Plot(label="UMAP results")
	cluster_interaction = gr.Plot(label="Spatial interaction of clusters")


	with gr.Row(equal_height=False): # action components
	umap_btn = gr.Button('Run Phenograph clustering')
	cluster_interact_btn = gr.Button('Run clustering interaction')
	cluster_interact_btn.click(cluster_interaction_fn, inputs=[cytof_state, cohort_state], outputs=[cluster_interaction, cytof_state, cohort_state])

	with gr.Row():
	with gr.Column():
	selected_cluster_marker = gr.Dropdown(label='Select one marker', info='Select a marker to visualize', interactive=True)
	cluster_positive_btn = gr.Button('Compare clusters and positive markers')

	with gr.Column():
	cluster_v_positive = gr.Plot(label="Cluster assignment vs. positive cells. Hover over graph to zoom, pan, save, etc.")


	umap_btn.click(
	fn=phenograph, inputs=[cohort_state], outputs=[phenograph_umap, cohort_state]
	).success(
	fn=get_cluster_pos_options, inputs=[cytof_state], outputs=[selected_cluster_marker], api_name='selectClusterMarker'
	)
	cluster_positive_btn.click(fn=viz_cluster_positive, inputs=[selected_cluster_marker, norm_percentile, cytof_state, cohort_state], outputs=[cluster_v_positive, cytof_state, cohort_state])


	# clear everything if clicked
	clear_components = [img_path, marker_path, img_info, img_viz, channel_feedback, seg_viz, feat_df, co_exp_viz, spatial_viz, marker_pos_viz, phenograph_umap, cluster_interaction, cluster_v_positive]
	clear_btn.click(lambda: [None]*len(clear_components), outputs=clear_components)


	if __name__ == "__main__":
	demo.launch()