KashyapiNagaHarshitha commited on
Commit
21d801f
·
verified ·
1 Parent(s): f031f40

Update Quality_Control.py

Browse files
Files changed (1) hide show
  1. Quality_Control.py +296 -185
Quality_Control.py CHANGED
@@ -12,12 +12,6 @@ import hvplot.pandas
12
  import pandas as pd
13
  import numpy as np
14
  import json
15
- import panel as pn
16
- import pandas as pd
17
- import os
18
- import pandas as pd
19
- import random
20
- import asyncio
21
  import matplotlib.pyplot as plt
22
  from bokeh.plotting import figure
23
  from bokeh.io import push_notebook, show
@@ -29,56 +23,47 @@ from bokeh.models import Span, Label
29
  from bokeh.models import ColumnDataSource, Button
30
  from my_modules import *
31
  from datasets import load_dataset
32
-
33
  #Silence FutureWarnings & UserWarnings
34
  warnings.filterwarnings('ignore', category= FutureWarning)
35
  warnings.filterwarnings('ignore', category= UserWarning)
36
 
37
- #input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
38
- present_dir = os.path.dirname(os.path.realpath(__file__))
39
- # Construct the full path to the stored_variables.json file
40
- json_path = os.path.join(present_dir, 'stored_variables.json')
41
- with open(json_path, 'r') as file:
42
- stored_vars = json.load(file)
43
- directory = stored_vars['base_dir']
44
- input_path = os.path.join(present_dir,directory)
45
- set_path = stored_vars['set_path']
46
- selected_metadata_files = stored_vars['selected_metadata_files']
47
- ls_samples = stored_vars['ls_samples']
48
- base_dir = input_path
49
 
50
- #input_path = '/Users/harshithakolipaka/Desktop/CycIF/wetransfer_data-zip_2024-05-17_1431'
51
- #set_path = 'test'
52
- #selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
53
- #ls_samples = ['Ashlar_Exposure_Time.csv', 'new_data.csv', 'DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
 
 
 
54
  pn.extension()
55
 
56
  update_button = pn.widgets.Button(name='CSV Files', button_type='primary')
57
  def update_samples(event):
58
- with open(json_path, 'r') as file:
59
  stored_vars = json.load(file)
60
- print(stored_vars)
61
- ls_samples = stored_vars['ls_samples']
62
- return f'CSV Files Selected: {ls_samples}'
63
  update_button.on_click(update_samples)
64
 
65
  csv_files_button = pn.widgets.Button(icon="clipboard", button_type="primary")
66
  indicator = pn.indicators.LoadingSpinner(value=False, size=25)
67
 
68
  def handle_click(clicks):
69
- with open(json_path, 'r') as file:
70
  stored_vars = json.load(file)
71
- print(stored_vars)
72
- #ls_samples = stored_vars['ls_samples']
73
- #return f'CSV Files Selected: {ls_samples}'
74
 
75
- # pn.Row(csv_files_button,pn.bind(
76
- # , csv_files_button.param.clicks),)
 
 
77
 
78
 
79
  # ## I.2. *DIRECTORIES
80
 
81
- #set_path = 'test'
82
 
83
  # Set base directory
84
 
@@ -132,7 +117,7 @@ for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata
132
  print("The", d, "directory already exists !")
133
 
134
  os.chdir(input_data_dir)
135
- with open(json_path, 'r') as file:
136
  stored_vars = json.load(file)
137
  # ls_samples = stored_vars['ls_samples']
138
  selected_metadata_files = stored_vars['selected_metadata_files']
@@ -180,6 +165,13 @@ print('metadata_images_dir :', metadata_images_dir)
180
  #ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(".csv")]
181
  print("The following CSV files were detected:\n\n",[sample for sample in ls_samples], "\n\nin", input_data_dir, "directory.")
182
 
 
 
 
 
 
 
 
183
  def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
184
  if len(selected_metadata_files) == []:
185
  if not file:
@@ -200,44 +192,38 @@ def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
200
  return combined_metadata_df
201
 
202
  else:
203
- '''if selected_metadata_files:
204
  single_file_path = os.path.join(metadata_dir, selected_metadata_files[0])
205
  single_file_df = pd.read_csv(single_file_path)
206
  print(f"Only one file selected: {selected_metadata_files[0]}")
207
-
208
- return single_file_df'''
209
-
210
- if len(selected_metadata_files) == 1:
211
- combined_metadata_path = os.path.join(metadata_dir, 'combined_metadata.csv')
212
-
213
- if os.path.exists(combined_metadata_path):
214
- print(f"Combined metadata file already exists: {combined_metadata_path}")
215
- combined_metadata_df = pd.read_csv(combined_metadata_path)
216
- else:
217
- if selected_metadata_files:
218
- combined_metadata_df = pd.DataFrame()
219
- for file in selected_metadata_files:
220
- file_path = os.path.join(metadata_dir, file)
221
- metadata_df = pd.read_csv(file_path)
222
- combined_metadata_df = pd.concat([combined_metadata_df, metadata_df], ignore_index=True)
223
-
224
- combined_metadata_df.to_csv(combined_metadata_path, index=False)
225
- print(f"Combined metadata saved to: {combined_metadata_path}")
226
- else:
227
- print("No metadata files selected.")
228
- combined_metadata_df = pd.DataFrame()
229
-
230
- return combined_metadata_df
231
 
232
  print(combine_and_save_metadata_files(metadata_dir, selected_metadata_files))
233
 
 
 
 
 
234
  ls_samples
235
 
 
 
236
  path = os.path.join(input_data_dir, ls_samples[0])
237
  #df = load_dataset('csv', data_files = path )
238
  df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]),index_col = 0, nrows = 1)
239
  df.head(10)
240
 
 
 
 
 
241
  # First gather information on expected headers using first file in ls_samples
242
  # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
243
  df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
@@ -248,7 +234,17 @@ print("df :\n", df.head(), "\n")
248
  print("df's columns :\n", df.columns, "\n")
249
  print("df's index :\n", df.index, "\n")
250
  print("df's index name :\n", df.index.name)
 
 
 
 
 
251
  df.head()
 
 
 
 
 
252
  # Verify that the ID column in input file became the index
253
  # Verify that the index name column is "ID", if not, rename it
254
  if df.index.name != "ID":
@@ -276,15 +272,40 @@ print("\ndf :\n", df.head(), "\n")
276
  print("df's columns :\n", df.columns, "\n")
277
  print("df's index :\n", df.index, "\n")
278
  print("df's index name :\n", df.index.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  print("Used " + ls_samples[0] + " to determine the expected and corrected headers for all files.\n")
280
  print("These headers are: \n" + ", ".join([h for h in expected_headers]))
281
 
282
  corrected_headers = True
283
 
 
 
 
 
284
  for sample in ls_samples:
285
  file_path = os.path.join(input_data_dir,sample)
286
  print(file_path)
287
 
 
 
 
 
288
  # Import all the others files
289
  dfs = {}
290
  ###############################
@@ -439,16 +460,32 @@ else:
439
  file_not_intensities.write(item + "\n")
440
  file_not_intensities.close()
441
 
 
 
 
 
442
  not_intensities_df = pd.read_csv(path_not_intensities)
443
  not_intensities_df
444
 
 
 
 
 
445
  # Columns we want to keep: not_intensities, and any intensity column that contains 'Intensity_Average' (drop any intensity marker column that is not a mean intensity)
446
  to_keep = not_intensities + [x for x in df.columns.values[~df.columns.isin(not_intensities)] if 'Intensity_Average' in x]
447
 
448
  to_keep
449
 
 
 
 
 
450
  print(len(to_keep) - 1)
451
 
 
 
 
 
452
  # However, our to_keep list contains items that might not be in our df headers!
453
  # These items are from our not_intensities list. So let's ask for only those items from to_keep that are actually found in our df
454
  # Retains only the columns from the to_keep list that are found in the df's headers (columns).
@@ -458,17 +495,14 @@ df = df[[x for x in to_keep if x in df.columns.values]]
458
 
459
  df.head()
460
 
461
- # Assuming you have a DataFrame named 'df'
462
- # df = pd.read_csv('your_file.csv')
463
 
464
- # Load or create the stored_variables.json file
465
- json_file_path = os.path.join(present_dir,"stored_variables.json")
466
 
467
- if os.path.exists(json_file_path):
468
- with open(json_file_path, "r") as file:
469
- stored_variables = json.load(file)
470
- else:
471
- stored_variables = {}
472
 
473
  # Get all column names
474
  all_columns = df.columns.tolist()
@@ -495,95 +529,6 @@ intensity_marker = list(set(intensity_marker))
495
  print("Intensity Markers:")
496
  print(intensity_marker)
497
 
498
- # Create a DataFrame with the intensity markers and default values
499
- marker_options_df = pd.DataFrame({
500
- 'Marker': intensity_marker,
501
- 'Cell': [True] * len(intensity_marker),
502
- 'Cytoplasm': [False] * len(intensity_marker),
503
- 'Nucleus': [False] * len(intensity_marker)
504
- })
505
-
506
- # Define formatters for the Tabulator widget
507
- tabulator_formatters = {
508
- 'Cell': {'type': 'tickCross'},
509
- 'Cytoplasm': {'type': 'tickCross'},
510
- 'Nucleus': {'type': 'tickCross'}
511
- }
512
-
513
- # Create the Tabulator widget
514
- tabulator = pn.widgets.Tabulator(marker_options_df, formatters=tabulator_formatters, sizing_mode='stretch_width')
515
-
516
- # Create a DataFrame to store the initial intensities
517
- new_data = [{'Description': f"{marker}_Cell_Intensity_Average"} for marker in intensity_marker if True]
518
- new_data_df = pd.DataFrame(new_data)
519
-
520
- # Create a widget to display the new data as a DataFrame
521
- new_data_table = pn.widgets.Tabulator(new_data_df, name='New Data Table', sizing_mode='stretch_width')
522
-
523
- # Create a button to start the update process
524
- run_button = pn.widgets.Button(name="Save Selection", button_type='primary')
525
-
526
- # Function to update stored_variables.json
527
- def update_stored_variables(selected_columns):
528
- stored_variables["selected_intensities"] = selected_columns
529
- with open(json_file_path, "w") as file:
530
- json.dump(stored_variables, file, indent=4)
531
-
532
- # Define the update_intensities function
533
- def update_intensities(event=None):
534
- global new_data, new_data_df
535
- new_data = []
536
- selected_columns = []
537
- for _, row in tabulator.value.iterrows():
538
- marker = row['Marker']
539
- if row['Cell']:
540
- new_data.append({'Description': f"{marker}_Cell_Intensity_Average"})
541
- selected_columns.append(f"{marker}_Cell_Intensity_Average")
542
- if row['Cytoplasm']:
543
- new_data.append({'Description': f"{marker}_Cytoplasm_Intensity_Average"})
544
- selected_columns.append(f"{marker}_Cytoplasm_Intensity_Average")
545
- if row['Nucleus']:
546
- new_data.append({'Description': f"{marker}_Nucleus_Intensity_Average"})
547
- selected_columns.append(f"{marker}_Nucleus_Intensity_Average")
548
- new_data_df = pd.DataFrame(new_data)
549
- new_data_table.value = new_data_df
550
- update_stored_variables(selected_columns)
551
- print("Updated intensities DataFrame:")
552
- print(new_data_df)
553
-
554
- # Define the runner function
555
- async def runner(event):
556
- update_intensities()
557
-
558
- # Bind the runner function to the button
559
- run_button.on_click(runner)
560
-
561
- # Attach the update_intensities function to changes in the Tabulator widget
562
- tabulator.param.watch(update_intensities, 'value')
563
-
564
- # Layout
565
- updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
566
- pn.extension('tabulator')
567
- '''
568
- # Iterate over each column name
569
- for column in all_columns:
570
- # Check if the column name contains 'Intensity_Average'
571
- if 'Intensity_Average' in column:
572
- # Split the column name by underscore
573
- parts = column.split('_')
574
-
575
- # Extract the word before the first underscore
576
- marker = parts[0]
577
-
578
- # Add the marker to the intensity_marker list
579
- intensity_marker.append(marker)
580
-
581
- # Remove duplicates from the intensity_marker list
582
- intensity_marker = list(set(intensity_marker))
583
-
584
- print("Intensity Markers:")
585
- print(intensity_marker)
586
-
587
  # Create a callback function to update the intensities array
588
  def update_intensities(event):
589
  global intensities
@@ -608,6 +553,10 @@ def update_intensities(event):
608
  print("Updated intensities DataFrame:")
609
  print(intensities_df)
610
 
 
 
 
 
611
  tabulator_formatters = {
612
  'bool': {'type': 'tickCross'}
613
  }
@@ -626,6 +575,12 @@ tabulator.param.watch(update_intensities,'value')
626
 
627
  # Create a Panel layout with the Tabulator widget
628
  marker_options_layout = pn.Column(tabulator, sizing_mode="stretch_width")
 
 
 
 
 
 
629
  # Initialize the Panel extension with Tabulator
630
  pn.extension('tabulator')
631
 
@@ -682,13 +637,17 @@ run_button.on_click(runner)
682
  # Layout
683
  updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
684
 
685
- pn.extension()'''
686
  # Serve the layout
687
  #updated_intensities.servable()
688
 
689
 
690
  intensities_df = new_data_table
 
 
691
  intensities_df = pn.pane.DataFrame(intensities_df)
 
 
692
  print(intensities_df)
693
  # ## I.4. QC CHECKS
694
 
@@ -745,6 +704,10 @@ def check_index_format(index_str, ls_samples):
745
  # If all checks pass, return True
746
  return True
747
 
 
 
 
 
748
  # Let's take a look at a few features to make sure our dataframe is as expected
749
  df.index
750
  def check_format_ofindex(index):
@@ -758,11 +721,19 @@ def check_format_ofindex(index):
758
  return index_format
759
  print(check_format_ofindex(df.index))
760
 
 
 
 
 
761
  df.shape
762
  check_index = df.index
763
  check_shape = df.shape
764
  print(check_shape)
765
 
 
 
 
 
766
  # Check for NaN entries (should not be any unless columns do not align)
767
  # False means no NaN entries
768
  # True means NaN entries
@@ -770,6 +741,10 @@ df.isnull().any().any()
770
 
771
  check_no_null = df.isnull().any().any()
772
 
 
 
 
 
773
  # Check that all expected files were imported into final dataframe
774
  if sorted(df.Sample_ID.unique()) == sorted(ls_samples):
775
  print("All expected filenames are present in big df Sample_ID column.")
@@ -780,6 +755,10 @@ else:
780
 
781
  print(df.Sample_ID)
782
 
 
 
 
 
783
  # Delete rows that have 0 value mean intensities for intensity columns
784
  print("df.shape before removing 0 mean values: ", df.shape)
785
 
@@ -846,6 +825,9 @@ for key, value in quality_check_results.items():
846
  print(f"{key}: {value}")
847
 
848
 
 
 
 
849
  import panel as pn
850
  import pandas as pd
851
 
@@ -943,6 +925,8 @@ def create_line_graph2(quantile):
943
 
944
  return p
945
 
 
 
946
  # Bind the create_line_graph function to the quantile slider
947
  nucleus_size_line_graph_with_histogram = pn.bind(create_line_graph2, quantile=quantile_slider.param.value)
948
 
@@ -967,8 +951,17 @@ qs = [quantile, 0.50, 1.00 - quantile]
967
  quantiles = df['Nucleus_Size'].quantile(q=qs).values
968
  threshold = quantiles[2]
969
 
 
 
 
 
970
  print(threshold)
971
 
 
 
 
 
 
972
  import panel as pn
973
  import pandas as pd
974
  import numpy as np
@@ -1006,6 +999,10 @@ results_display = pn.bind(update_threshold_and_display, quantile_slider)
1006
  # Layout the components in a Panel app
1007
  layout2 = results_display
1008
 
 
 
 
 
1009
  print("Number of cells before filtering :", df.shape[0])
1010
  cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
1011
  # Delete small cells and objects w/high AF555 Signal (RBCs)
@@ -1113,6 +1110,10 @@ def calculate_quantiles(column, quantile):
1113
  quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
1114
  return quantiles
1115
 
 
 
 
 
1116
  quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
1117
 
1118
 
@@ -1122,10 +1123,30 @@ quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99,
1122
  # Layout the components in a Panel app
1123
  #nucleus_size_graph = pn.Column(nucleus_size_line_graph)
1124
 
 
 
 
 
 
 
 
 
 
 
1125
  len(intensities)
 
 
 
 
 
 
1126
 
1127
  df
1128
 
 
 
 
 
1129
  def calculate_cytoplasm_quantiles(column, quantile):
1130
  # Print the columns of the DataFrame
1131
  print("DataFrame columns:", df.columns)
@@ -1143,9 +1164,14 @@ def create_cytoplasm_intensity_df(column, quantile):
1143
  return pn.pane.DataFrame(output)
1144
 
1145
  # Bind the create_app function to the quantile slider
1146
- cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column=df.columns[10], quantile=quantile_slider.param.value)
1147
 
1148
  pn.Column(quantile_slider, cytoplasm_quantile_output_app)
 
 
 
 
 
1149
  def calculate_cytoplasm_quantiles(column, quantile):
1150
  quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
1151
  return quantiles
@@ -1159,12 +1185,15 @@ def create_cytoplasm_intensity_df(column, quantile):
1159
 
1160
 
1161
  # Bind the create_app function to the quantile slider
1162
- cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column=df.columns[10], quantile = quantile_slider.param.value)
1163
  pn.Column(quantile_slider,cytoplasm_quantile_output_app)
1164
 
1165
 
1166
  # ## I.5. COLUMNS OF INTERESTS
1167
 
 
 
 
1168
  # Remove columns containing "DAPI"
1169
  df = df[[x for x in df.columns.values if 'DAPI' not in x]]
1170
 
@@ -1172,6 +1201,9 @@ print("Columns are now...")
1172
  print([c for c in df.columns.values])
1173
 
1174
 
 
 
 
1175
  # Create lists of full names and shortened names to use in plotting
1176
  full_to_short_names, short_to_full_names = \
1177
  shorten_feature_names(df.columns.values[~df.columns.isin(not_intensities)])
@@ -1179,6 +1211,9 @@ full_to_short_names, short_to_full_names = \
1179
  short_to_full_names
1180
 
1181
 
 
 
 
1182
  # Save this data to a metadata file
1183
  filename = os.path.join(metadata_dir, "full_to_short_column_names.csv")
1184
  fh = open(filename, "w")
@@ -1189,6 +1224,10 @@ for k,v in full_to_short_names.items():
1189
  fh.close()
1190
  print("The full_to_short_column_names.csv file was created !")
1191
 
 
 
 
 
1192
  # Save this data to a metadata file
1193
  filename = os.path.join(metadata_dir, "short_to_full_column_names.csv")
1194
  fh = open(filename, "w")
@@ -1202,11 +1241,18 @@ print("The short_to_full_column_names.csv file was created !")
1202
 
1203
  # ## I.6. EXPOSURE TIME
1204
 
 
 
1205
 
1206
  #import the ashlar analysis file
1207
  file_path = os.path.join(metadata_dir, 'combined_metadata.csv')
1208
  ashlar_analysis = pd.read_csv(file_path)
1209
  ashlar_analysis
 
 
 
 
 
1210
  # Extracting and renaming columns
1211
  new_df = ashlar_analysis[['Name', 'Cycle', 'ChannelIndex', 'ExposureTime']].copy()
1212
  new_df.rename(columns={
@@ -1225,6 +1271,10 @@ new_df.to_csv('Ashlar_Exposure_Time.csv', index=False)
1225
  # Print the new dataframe
1226
  print(new_df)
1227
 
 
 
 
 
1228
  # Here, we want to end up with a data structure that incorporates metadata on each intensity marker column used in our big dataframe in an easy-to-use format.
1229
  # This is going to include the full name of the intensity marker columns in the big data frame,
1230
  # the corresponding round and channel,
@@ -1254,21 +1304,41 @@ else:
1254
  print("\nNo null values detected.")
1255
 
1256
 
 
 
 
1257
  if len(exp_df['Target']) > len(exp_df['Target'].unique()):
1258
  print("One or more non-unique Target values in exp_df. Currently not supported.")
1259
  exp_df = exp_df.drop_duplicates(subset = 'Target').reindex()
1260
 
 
 
 
 
1261
  # sort exp_df by the values in the 'Target' column in ascending order and then retrieve the first few rows of the sorted df
1262
  exp_df.sort_values(by = ['Target']).head()
1263
 
 
 
 
 
1264
  # Create lowercase version of target
1265
  exp_df['target_lower'] = exp_df['Target'].str.lower()
1266
  exp_df.head()
1267
 
 
 
 
 
1268
  # Create df that contains marker intensity columns in our df that aren't in not_intensities
1269
  intensities = pd.DataFrame({'full_column':df.columns.values[~df.columns.isin(not_intensities)]})
1270
 
1271
  intensities
 
 
 
 
 
1272
  # Extract the marker information from the `full_column`, which corresponds to full column in big dataframe
1273
  # Use regular expressions (regex) to isolate the part of the field that begins (^) with an alphanumeric value (W), and ends with an underscore (_)
1274
  # '$' is end of line
@@ -1277,10 +1347,20 @@ intensities['marker'] = intensities['full_column'].str.extract(r'([^\W_]+)')
1277
  intensities['marker_lower'] = intensities['marker'].str.lower()
1278
 
1279
  intensities
 
 
 
 
 
1280
  # Subset the intensities df to exclude any column pertaining to DAPI
1281
  intensities = intensities.loc[intensities['marker_lower'] != 'dapi']
1282
 
1283
  intensities.head()
 
 
 
 
 
1284
  # Merge the intensities andexp_df together to create metadata
1285
  metadata = pd.merge(exp_df, intensities, how = 'left', left_on = 'target_lower',right_on = 'marker_lower')
1286
  metadata = metadata.drop(columns = ['marker_lower'])
@@ -1290,14 +1370,27 @@ metadata = metadata.dropna()
1290
  # target_lower is Target in small caps
1291
  # marker is the extracted first component of the full column in segmentation data, with corresponding capitalization
1292
  metadata
 
 
 
 
 
1293
  # Add a column to signify marker target localisation.
1294
  # Use a lambda to determine segmented location of intensity marker column and update metadata accordingly
1295
  # Using the add_metadata_location() function in my_modules.py
1296
  metadata['localisation'] = metadata.apply(
1297
  lambda row: add_metadata_location(row), axis = 1)
1298
 
 
 
 
 
1299
  mlid = metadata
1300
 
 
 
 
 
1301
  # Save this data structure to the metadata folder
1302
  # don't want to add color in because that's better off treating color the same for round, channel, and sample
1303
  filename = "marker_intensity_metadata.csv"
@@ -1336,6 +1429,10 @@ custom_colors_values = sb.palplot(sb.color_palette([custom_colors.get(ch, 'blue'
1336
  print("Unique channels are:", metadata.Channel.unique())
1337
  sb.palplot(sb.color_palette(channel_color_values))
1338
 
 
 
 
 
1339
  # Function to create a palette plot with custom colors
1340
  def create_palette_plot():
1341
  # Get unique channels
@@ -1398,6 +1495,9 @@ app_palette_plot = create_palette_plot(custom_colors)
1398
  #app_palette_plot.servable()
1399
 
1400
 
 
 
 
1401
  # Store in a dictionary
1402
  channel_color_dict = dict(zip(metadata.Channel.unique(), channel_color_values))
1403
  channel_color_dict
@@ -1406,6 +1506,10 @@ for k,v in channel_color_dict.items():
1406
 
1407
  channel_color_dict
1408
 
 
 
 
 
1409
  color_df_channel = color_dict_to_df(channel_color_dict, "Channel")
1410
 
1411
  # Save to file in metadatadirectory
@@ -1415,6 +1519,10 @@ color_df_channel.to_csv(filename, index = False)
1415
 
1416
  color_df_channel
1417
 
 
 
 
 
1418
  # Legend of channel info only
1419
  g = plt.figure(figsize = (1,1)).add_subplot(111)
1420
  g.axis('off')
@@ -1448,6 +1556,10 @@ sb.palplot(sb.color_palette(round_color_values))
1448
 
1449
  ## TO-DO: write what these parameters mean
1450
 
 
 
 
 
1451
  # Store in a dictionary
1452
  round_color_dict = dict(zip(metadata.Round.unique(), round_color_values))
1453
 
@@ -1456,6 +1568,10 @@ for k,v in round_color_dict.items():
1456
 
1457
  round_color_dict
1458
 
 
 
 
 
1459
  color_df_round = color_dict_to_df(round_color_dict, "Round")
1460
 
1461
  # Save to file in metadatadirectory
@@ -1485,6 +1601,9 @@ plt.savefig(filename, bbox_inches = 'tight')
1485
 
1486
  # ### I.7.3. SAMPLES COLORS
1487
 
 
 
 
1488
  # we want colors that are neither sequential nor categorical.
1489
  # Categorical would be ideal if we could generate an arbitrary number of colors, but I do not think that we can.
1490
  # Hense, we will choose `n` colors from a continuous palette. First we will generate the right number of colors. Later, we will assign TMA samples to gray.
@@ -1496,10 +1615,18 @@ color_values = sb.color_palette("husl",n_colors = len(ls_samples))#'HLS'
1496
  # Display those unique colors
1497
  sb.palplot(sb.color_palette(color_values))
1498
 
 
 
 
 
1499
  TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
1500
  TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
1501
  sb.palplot(sb.color_palette(TMA_color_values))
1502
 
 
 
 
 
1503
  # Store in a dictionary
1504
  color_dict = dict()
1505
  color_dict = dict(zip(df.Sample_ID.unique(), color_values))
@@ -1615,34 +1742,16 @@ variable_widget = pn.widgets.Select(name="Target", value="Exp", options=list(met
1615
  window_widget = pn.widgets.IntSlider(name="window", value=30, start=1, end=60)
1616
  sigma_widget = pn.widgets.IntSlider(name="sigma", value=10, start=0, end=20)
1617
 
1618
- # Function to save files
1619
- def save_files(event):
1620
- for sample in ls_samples:
1621
- sample_id = sample.split('.csv')[0]
1622
- filename = os.path.join(output_data_dir, sample_id + "_" + step_suffix + ".csv")
1623
-
1624
- df_save = df.loc[df['Sample_ID'] == sample, :]
1625
- if os.path.exists(filename):
1626
- df_save.to_csv(filename, index=True, index_label='ID', mode='w') # Overwrite by default
1627
- print(f"File {filename} was overwritten!")
1628
- else:
1629
- df_save.to_csv(filename, index=True, index_label='ID') # Save normally if the file doesn't exist
1630
- print(f"File {filename} was created and saved!")
1631
-
1632
- # Button to download files
1633
- download_button = pn.widgets.Button(name='Download Files', button_type='primary')
1634
- download_button.on_click(save_files)
1635
-
1636
  app = pn.template.GoldenTemplate(
1637
  site="Cyc-IF",
1638
  title="Quality Control",
1639
  main=[
1640
  pn.Tabs(
1641
  ("Dataframes", pn.Column(
1642
- pn.Row(csv_files_button,pn.bind(handle_click, csv_files_button.param.clicks), ),
1643
  pn.pane.Markdown("### The Dataframe uploaded:"), pn.pane.DataFrame(intial_dataframe),
1644
  #pn.pane.Markdown("### The Exposure time DataFrame is :"), pn.pane.DataFrame(exp_df.head()),
1645
- pn.pane.Markdown("### The DataFrame after merging CycIF data x metadata :"), pn.pane.DataFrame(merged_dataframe.head(25)),
1646
  )),
1647
  ("Quality Control", pn.Column(
1648
  quality_check(quality_control_df, not_intensities)
@@ -1656,17 +1765,19 @@ app = pn.template.GoldenTemplate(
1656
  )),
1657
  ("Plots", pn.Column(
1658
  #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(nucleus_size_line_graph_with_histogram, num_of_cell_removal),
1659
- pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(plot1,layout2),
1660
  #pn.pane.Markdown("### Nucleus Distribution Plot:"), pn.Column(nucleus_size_plot, nucleus_size_graph),
1661
  pn.pane.Markdown(" ### Intensity Average Plot:"), pn.Row(selected_marker_plot,num_of_cell_removal_intensity ),
1662
  #pn.Column(pn.Column(column_dropdown, generate_plot_button), quantile_slider, plot),
1663
  #pn.pane.Markdown("### Cytoplasm Intensity Plot:"), cytoplasm_intensity_plot,
1664
  #pn.pane.Markdown("### AF555_Cell_Intensity_Average:"), quantile_output_app,
1665
- #pn.pane.Markdown("### Distribution of AF555_Cell_Intensity_Average with Quantiles:"), quantile_intensity_plot),
1666
- pn.Column(download_button),
1667
  )),
1668
 
1669
  ),
1670
  ])
1671
 
1672
- app.servable()
 
 
 
 
12
  import pandas as pd
13
  import numpy as np
14
  import json
 
 
 
 
 
 
15
  import matplotlib.pyplot as plt
16
  from bokeh.plotting import figure
17
  from bokeh.io import push_notebook, show
 
23
  from bokeh.models import ColumnDataSource, Button
24
  from my_modules import *
25
  from datasets import load_dataset
26
+ os.getcwd()
27
  #Silence FutureWarnings & UserWarnings
28
  warnings.filterwarnings('ignore', category= FutureWarning)
29
  warnings.filterwarnings('ignore', category= UserWarning)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ #present_dir = os.path.dirname(os.path.realpath(__file__))
33
+ #input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
34
+ base_dir = '/code/wetransfer_data-zip_2024-05-17_1431'
35
+ set_path = 'test'
36
+ selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
37
+ ls_samples = ['DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
38
+
39
  pn.extension()
40
 
41
  update_button = pn.widgets.Button(name='CSV Files', button_type='primary')
42
  def update_samples(event):
43
+ with open('stored_variables.json', 'r') as file:
44
  stored_vars = json.load(file)
45
+ # ls_samples = stored_vars['ls_samples']
46
+ print(ls_samples)
 
47
  update_button.on_click(update_samples)
48
 
49
  csv_files_button = pn.widgets.Button(icon="clipboard", button_type="primary")
50
  indicator = pn.indicators.LoadingSpinner(value=False, size=25)
51
 
52
  def handle_click(clicks):
53
+ with open('stored_variables.json', 'r') as file:
54
  stored_vars = json.load(file)
55
+ # ls_samples = stored_vars['ls_samples']
56
+ return f'CSV Files Selected: {ls_samples}'
 
57
 
58
+ pn.Row(
59
+ csv_files_button,
60
+ pn.bind(handle_click, csv_files_button.param.clicks),
61
+ )
62
 
63
 
64
  # ## I.2. *DIRECTORIES
65
 
66
+ set_path = 'test'
67
 
68
  # Set base directory
69
 
 
117
  print("The", d, "directory already exists !")
118
 
119
  os.chdir(input_data_dir)
120
+ with open('stored_variables.json', 'r') as file:
121
  stored_vars = json.load(file)
122
  # ls_samples = stored_vars['ls_samples']
123
  selected_metadata_files = stored_vars['selected_metadata_files']
 
165
  #ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(".csv")]
166
  print("The following CSV files were detected:\n\n",[sample for sample in ls_samples], "\n\nin", input_data_dir, "directory.")
167
 
168
+
169
+ # In[26]:
170
+
171
+
172
+ import os
173
+ import pandas as pd
174
+
175
  def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
176
  if len(selected_metadata_files) == []:
177
  if not file:
 
192
  return combined_metadata_df
193
 
194
  else:
195
+ if selected_metadata_files:
196
  single_file_path = os.path.join(metadata_dir, selected_metadata_files[0])
197
  single_file_df = pd.read_csv(single_file_path)
198
  print(f"Only one file selected: {selected_metadata_files[0]}")
199
+ return single_file_df
200
+ else:
201
+ print("No metadata files selected.")
202
+ return pd.DataFrame()
203
+
204
+
205
+ # In[27]:
206
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  print(combine_and_save_metadata_files(metadata_dir, selected_metadata_files))
209
 
210
+
211
+ # In[28]:
212
+
213
+
214
  ls_samples
215
 
216
+
217
+ # In[29]:
218
  path = os.path.join(input_data_dir, ls_samples[0])
219
  #df = load_dataset('csv', data_files = path )
220
  df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]),index_col = 0, nrows = 1)
221
  df.head(10)
222
 
223
+
224
+ # In[30]:
225
+
226
+
227
  # First gather information on expected headers using first file in ls_samples
228
  # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
229
  df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
 
234
  print("df's columns :\n", df.columns, "\n")
235
  print("df's index :\n", df.index, "\n")
236
  print("df's index name :\n", df.index.name)
237
+
238
+
239
+ # In[31]:
240
+
241
+
242
  df.head()
243
+
244
+
245
+ # In[32]:
246
+
247
+
248
  # Verify that the ID column in input file became the index
249
  # Verify that the index name column is "ID", if not, rename it
250
  if df.index.name != "ID":
 
272
  print("df's columns :\n", df.columns, "\n")
273
  print("df's index :\n", df.index, "\n")
274
  print("df's index name :\n", df.index.name)
275
+
276
+
277
+ # In[33]:
278
+
279
+
280
+ df.head()
281
+
282
+
283
+ # In[34]:
284
+
285
+
286
+ df.head()
287
+
288
+
289
+ # In[35]:
290
+
291
+
292
  print("Used " + ls_samples[0] + " to determine the expected and corrected headers for all files.\n")
293
  print("These headers are: \n" + ", ".join([h for h in expected_headers]))
294
 
295
  corrected_headers = True
296
 
297
+
298
+ # In[36]:
299
+
300
+
301
  for sample in ls_samples:
302
  file_path = os.path.join(input_data_dir,sample)
303
  print(file_path)
304
 
305
+
306
+ # In[37]:
307
+
308
+
309
  # Import all the others files
310
  dfs = {}
311
  ###############################
 
460
  file_not_intensities.write(item + "\n")
461
  file_not_intensities.close()
462
 
463
+
464
+ # In[46]:
465
+
466
+
467
  not_intensities_df = pd.read_csv(path_not_intensities)
468
  not_intensities_df
469
 
470
+
471
+ # In[47]:
472
+
473
+
474
  # Columns we want to keep: not_intensities, and any intensity column that contains 'Intensity_Average' (drop any intensity marker column that is not a mean intensity)
475
  to_keep = not_intensities + [x for x in df.columns.values[~df.columns.isin(not_intensities)] if 'Intensity_Average' in x]
476
 
477
  to_keep
478
 
479
+
480
+ # In[48]:
481
+
482
+
483
  print(len(to_keep) - 1)
484
 
485
+
486
+ # In[49]:
487
+
488
+
489
  # However, our to_keep list contains items that might not be in our df headers!
490
  # These items are from our not_intensities list. So let's ask for only those items from to_keep that are actually found in our df
491
  # Retains only the columns from the to_keep list that are found in the df's headers (columns).
 
495
 
496
  df.head()
497
 
 
 
498
 
499
+ # In[50]:
 
500
 
501
+
502
+ import pandas as pd
503
+
504
+ # Assuming you have a DataFrame named 'df'
505
+ # df = pd.read_csv('your_file.csv')
506
 
507
  # Get all column names
508
  all_columns = df.columns.tolist()
 
529
  print("Intensity Markers:")
530
  print(intensity_marker)
531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  # Create a callback function to update the intensities array
533
  def update_intensities(event):
534
  global intensities
 
553
  print("Updated intensities DataFrame:")
554
  print(intensities_df)
555
 
556
+
557
+ # In[54]:
558
+
559
+
560
  tabulator_formatters = {
561
  'bool': {'type': 'tickCross'}
562
  }
 
575
 
576
  # Create a Panel layout with the Tabulator widget
577
  marker_options_layout = pn.Column(tabulator, sizing_mode="stretch_width")
578
+
579
+ import panel as pn
580
+ import pandas as pd
581
+ import random
582
+ import asyncio
583
+
584
  # Initialize the Panel extension with Tabulator
585
  pn.extension('tabulator')
586
 
 
637
  # Layout
638
  updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
639
 
640
+ pn.extension()
641
  # Serve the layout
642
  #updated_intensities.servable()
643
 
644
 
645
  intensities_df = new_data_table
646
+ intensities_df
647
+
648
  intensities_df = pn.pane.DataFrame(intensities_df)
649
+ intensities_df
650
+
651
  print(intensities_df)
652
  # ## I.4. QC CHECKS
653
 
 
704
  # If all checks pass, return True
705
  return True
706
 
707
+
708
+ # In[70]:
709
+
710
+
711
  # Let's take a look at a few features to make sure our dataframe is as expected
712
  df.index
713
  def check_format_ofindex(index):
 
721
  return index_format
722
  print(check_format_ofindex(df.index))
723
 
724
+
725
+ # In[71]:
726
+
727
+
728
  df.shape
729
  check_index = df.index
730
  check_shape = df.shape
731
  print(check_shape)
732
 
733
+
734
+ # In[72]:
735
+
736
+
737
  # Check for NaN entries (should not be any unless columns do not align)
738
  # False means no NaN entries
739
  # True means NaN entries
 
741
 
742
  check_no_null = df.isnull().any().any()
743
 
744
+
745
+ # In[73]:
746
+
747
+
748
  # Check that all expected files were imported into final dataframe
749
  if sorted(df.Sample_ID.unique()) == sorted(ls_samples):
750
  print("All expected filenames are present in big df Sample_ID column.")
 
755
 
756
  print(df.Sample_ID)
757
 
758
+
759
+ # In[74]:
760
+
761
+
762
  # Delete rows that have 0 value mean intensities for intensity columns
763
  print("df.shape before removing 0 mean values: ", df.shape)
764
 
 
825
  print(f"{key}: {value}")
826
 
827
 
828
+ # In[80]:
829
+
830
+
831
  import panel as pn
832
  import pandas as pd
833
 
 
925
 
926
  return p
927
 
928
+
929
+
930
  # Bind the create_line_graph function to the quantile slider
931
  nucleus_size_line_graph_with_histogram = pn.bind(create_line_graph2, quantile=quantile_slider.param.value)
932
 
 
951
  quantiles = df['Nucleus_Size'].quantile(q=qs).values
952
  threshold = quantiles[2]
953
 
954
+
955
+ # In[89]:
956
+
957
+
958
  print(threshold)
959
 
960
+
961
+ # In[90]:
962
+
963
+
964
+
965
  import panel as pn
966
  import pandas as pd
967
  import numpy as np
 
999
  # Layout the components in a Panel app
1000
  layout2 = results_display
1001
 
1002
+
1003
+ # In[91]:
1004
+
1005
+
1006
  print("Number of cells before filtering :", df.shape[0])
1007
  cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
1008
  # Delete small cells and objects w/high AF555 Signal (RBCs)
 
1110
  quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
1111
  return quantiles
1112
 
1113
+
1114
+ # In[105]:
1115
+
1116
+
1117
  quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
1118
 
1119
 
 
1123
  # Layout the components in a Panel app
1124
  #nucleus_size_graph = pn.Column(nucleus_size_line_graph)
1125
 
1126
+
1127
+ # In[106]:
1128
+
1129
+
1130
+ #df["CKs_Cytoplasm_Intensity_Average"].quantile(q=qs)
1131
+
1132
+
1133
+ # In[107]:
1134
+
1135
+
1136
  len(intensities)
1137
+ if 'CKs_Cytoplasm_Intensity_Average' in intensities:
1138
+ print(1)
1139
+
1140
+
1141
+ # In[108]:
1142
+
1143
 
1144
  df
1145
 
1146
+
1147
+ # In[109]:
1148
+
1149
+
1150
  def calculate_cytoplasm_quantiles(column, quantile):
1151
  # Print the columns of the DataFrame
1152
  print("DataFrame columns:", df.columns)
 
1164
  return pn.pane.DataFrame(output)
1165
 
1166
  # Bind the create_app function to the quantile slider
1167
+ cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column='CKs_Cytoplasm_Intensity_Average', quantile=quantile_slider.param.value)
1168
 
1169
  pn.Column(quantile_slider, cytoplasm_quantile_output_app)
1170
+
1171
+
1172
+ # In[110]:
1173
+
1174
+
1175
  def calculate_cytoplasm_quantiles(column, quantile):
1176
  quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
1177
  return quantiles
 
1185
 
1186
 
1187
  # Bind the create_app function to the quantile slider
1188
+ cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column='CKs_Cytoplasm_Intensity_Average', quantile = quantile_slider.param.value)
1189
  pn.Column(quantile_slider,cytoplasm_quantile_output_app)
1190
 
1191
 
1192
  # ## I.5. COLUMNS OF INTERESTS
1193
 
1194
+ # In[111]:
1195
+
1196
+
1197
  # Remove columns containing "DAPI"
1198
  df = df[[x for x in df.columns.values if 'DAPI' not in x]]
1199
 
 
1201
  print([c for c in df.columns.values])
1202
 
1203
 
1204
+ # In[112]:
1205
+
1206
+
1207
  # Create lists of full names and shortened names to use in plotting
1208
  full_to_short_names, short_to_full_names = \
1209
  shorten_feature_names(df.columns.values[~df.columns.isin(not_intensities)])
 
1211
  short_to_full_names
1212
 
1213
 
1214
+ # In[113]:
1215
+
1216
+
1217
  # Save this data to a metadata file
1218
  filename = os.path.join(metadata_dir, "full_to_short_column_names.csv")
1219
  fh = open(filename, "w")
 
1224
  fh.close()
1225
  print("The full_to_short_column_names.csv file was created !")
1226
 
1227
+
1228
+ # In[114]:
1229
+
1230
+
1231
  # Save this data to a metadata file
1232
  filename = os.path.join(metadata_dir, "short_to_full_column_names.csv")
1233
  fh = open(filename, "w")
 
1241
 
1242
  # ## I.6. EXPOSURE TIME
1243
 
1244
+ # In[115]:
1245
+
1246
 
1247
  #import the ashlar analysis file
1248
  file_path = os.path.join(metadata_dir, 'combined_metadata.csv')
1249
  ashlar_analysis = pd.read_csv(file_path)
1250
  ashlar_analysis
1251
+
1252
+
1253
+ # In[116]:
1254
+
1255
+
1256
  # Extracting and renaming columns
1257
  new_df = ashlar_analysis[['Name', 'Cycle', 'ChannelIndex', 'ExposureTime']].copy()
1258
  new_df.rename(columns={
 
1271
  # Print the new dataframe
1272
  print(new_df)
1273
 
1274
+
1275
+ # In[117]:
1276
+
1277
+
1278
  # Here, we want to end up with a data structure that incorporates metadata on each intensity marker column used in our big dataframe in an easy-to-use format.
1279
  # This is going to include the full name of the intensity marker columns in the big data frame,
1280
  # the corresponding round and channel,
 
1304
  print("\nNo null values detected.")
1305
 
1306
 
1307
+ # In[118]:
1308
+
1309
+
1310
  if len(exp_df['Target']) > len(exp_df['Target'].unique()):
1311
  print("One or more non-unique Target values in exp_df. Currently not supported.")
1312
  exp_df = exp_df.drop_duplicates(subset = 'Target').reindex()
1313
 
1314
+
1315
+ # In[119]:
1316
+
1317
+
1318
  # sort exp_df by the values in the 'Target' column in ascending order and then retrieve the first few rows of the sorted df
1319
  exp_df.sort_values(by = ['Target']).head()
1320
 
1321
+
1322
+ # In[120]:
1323
+
1324
+
1325
  # Create lowercase version of target
1326
  exp_df['target_lower'] = exp_df['Target'].str.lower()
1327
  exp_df.head()
1328
 
1329
+
1330
+ # In[121]:
1331
+
1332
+
1333
  # Create df that contains marker intensity columns in our df that aren't in not_intensities
1334
  intensities = pd.DataFrame({'full_column':df.columns.values[~df.columns.isin(not_intensities)]})
1335
 
1336
  intensities
1337
+
1338
+
1339
+ # In[122]:
1340
+
1341
+
1342
  # Extract the marker information from the `full_column`, which corresponds to full column in big dataframe
1343
  # Use regular expressions (regex) to isolate the part of the field that begins (^) with an alphanumeric value (W), and ends with an underscore (_)
1344
  # '$' is end of line
 
1347
  intensities['marker_lower'] = intensities['marker'].str.lower()
1348
 
1349
  intensities
1350
+
1351
+
1352
+ # In[123]:
1353
+
1354
+
1355
  # Subset the intensities df to exclude any column pertaining to DAPI
1356
  intensities = intensities.loc[intensities['marker_lower'] != 'dapi']
1357
 
1358
  intensities.head()
1359
+
1360
+
1361
+ # In[124]:
1362
+
1363
+
1364
  # Merge the intensities andexp_df together to create metadata
1365
  metadata = pd.merge(exp_df, intensities, how = 'left', left_on = 'target_lower',right_on = 'marker_lower')
1366
  metadata = metadata.drop(columns = ['marker_lower'])
 
1370
  # target_lower is Target in small caps
1371
  # marker is the extracted first component of the full column in segmentation data, with corresponding capitalization
1372
  metadata
1373
+
1374
+
1375
+ # In[125]:
1376
+
1377
+
1378
  # Add a column to signify marker target localisation.
1379
  # Use a lambda to determine segmented location of intensity marker column and update metadata accordingly
1380
  # Using the add_metadata_location() function in my_modules.py
1381
  metadata['localisation'] = metadata.apply(
1382
  lambda row: add_metadata_location(row), axis = 1)
1383
 
1384
+
1385
+ # In[126]:
1386
+
1387
+
1388
  mlid = metadata
1389
 
1390
+
1391
+ # In[127]:
1392
+
1393
+
1394
  # Save this data structure to the metadata folder
1395
  # don't want to add color in because that's better off treating color the same for round, channel, and sample
1396
  filename = "marker_intensity_metadata.csv"
 
1429
  print("Unique channels are:", metadata.Channel.unique())
1430
  sb.palplot(sb.color_palette(channel_color_values))
1431
 
1432
+
1433
+ # In[131]:
1434
+
1435
+
1436
  # Function to create a palette plot with custom colors
1437
  def create_palette_plot():
1438
  # Get unique channels
 
1495
  #app_palette_plot.servable()
1496
 
1497
 
1498
+ # In[133]:
1499
+
1500
+
1501
  # Store in a dictionary
1502
  channel_color_dict = dict(zip(metadata.Channel.unique(), channel_color_values))
1503
  channel_color_dict
 
1506
 
1507
  channel_color_dict
1508
 
1509
+
1510
+ # In[134]:
1511
+
1512
+
1513
  color_df_channel = color_dict_to_df(channel_color_dict, "Channel")
1514
 
1515
  # Save to file in metadatadirectory
 
1519
 
1520
  color_df_channel
1521
 
1522
+
1523
+ # In[135]:
1524
+
1525
+
1526
  # Legend of channel info only
1527
  g = plt.figure(figsize = (1,1)).add_subplot(111)
1528
  g.axis('off')
 
1556
 
1557
  ## TO-DO: write what these parameters mean
1558
 
1559
+
1560
+ # In[137]:
1561
+
1562
+
1563
  # Store in a dictionary
1564
  round_color_dict = dict(zip(metadata.Round.unique(), round_color_values))
1565
 
 
1568
 
1569
  round_color_dict
1570
 
1571
+
1572
+ # In[138]:
1573
+
1574
+
1575
  color_df_round = color_dict_to_df(round_color_dict, "Round")
1576
 
1577
  # Save to file in metadatadirectory
 
1601
 
1602
  # ### I.7.3. SAMPLES COLORS
1603
 
1604
+ # In[140]:
1605
+
1606
+
1607
  # we want colors that are neither sequential nor categorical.
1608
  # Categorical would be ideal if we could generate an arbitrary number of colors, but I do not think that we can.
1609
  # Hense, we will choose `n` colors from a continuous palette. First we will generate the right number of colors. Later, we will assign TMA samples to gray.
 
1615
  # Display those unique colors
1616
  sb.palplot(sb.color_palette(color_values))
1617
 
1618
+
1619
+ # In[141]:
1620
+
1621
+
1622
  TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
1623
  TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
1624
  sb.palplot(sb.color_palette(TMA_color_values))
1625
 
1626
+
1627
+ # In[142]:
1628
+
1629
+
1630
  # Store in a dictionary
1631
  color_dict = dict()
1632
  color_dict = dict(zip(df.Sample_ID.unique(), color_values))
 
1742
  window_widget = pn.widgets.IntSlider(name="window", value=30, start=1, end=60)
1743
  sigma_widget = pn.widgets.IntSlider(name="sigma", value=10, start=0, end=20)
1744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1745
  app = pn.template.GoldenTemplate(
1746
  site="Cyc-IF",
1747
  title="Quality Control",
1748
  main=[
1749
  pn.Tabs(
1750
  ("Dataframes", pn.Column(
1751
+ pn.Row(csv_files_button,pn.bind(handle_click, csv_files_button.param.clicks)),
1752
  pn.pane.Markdown("### The Dataframe uploaded:"), pn.pane.DataFrame(intial_dataframe),
1753
  #pn.pane.Markdown("### The Exposure time DataFrame is :"), pn.pane.DataFrame(exp_df.head()),
1754
+ pn.pane.Markdown("### The DataFrame after merging CycIF data x metadata :"), pn.pane.DataFrame(merged_dataframe.head()),
1755
  )),
1756
  ("Quality Control", pn.Column(
1757
  quality_check(quality_control_df, not_intensities)
 
1765
  )),
1766
  ("Plots", pn.Column(
1767
  #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(nucleus_size_line_graph_with_histogram, num_of_cell_removal),
1768
+ #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(plot1,layout2),
1769
  #pn.pane.Markdown("### Nucleus Distribution Plot:"), pn.Column(nucleus_size_plot, nucleus_size_graph),
1770
  pn.pane.Markdown(" ### Intensity Average Plot:"), pn.Row(selected_marker_plot,num_of_cell_removal_intensity ),
1771
  #pn.Column(pn.Column(column_dropdown, generate_plot_button), quantile_slider, plot),
1772
  #pn.pane.Markdown("### Cytoplasm Intensity Plot:"), cytoplasm_intensity_plot,
1773
  #pn.pane.Markdown("### AF555_Cell_Intensity_Average:"), quantile_output_app,
1774
+ #pn.pane.Markdown("### Distribution of AF555_Cell_Intensity_Average with Quantiles:"), quantile_intensity_plot)
 
1775
  )),
1776
 
1777
  ),
1778
  ])
1779
 
1780
+ app.servable()
1781
+
1782
+ if __name__ == "__main__":
1783
+ pn.serve(app, port=5007)