Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files Community

KashyapiNagaHarshitha commited on Jul 4, 2024

Commit

21d801f

verified ·

1 Parent(s): f031f40

Update Quality_Control.py

Browse files

Files changed (1) hide show

Quality_Control.py +296 -185

Quality_Control.py CHANGED Viewed

@@ -12,12 +12,6 @@ import hvplot.pandas
 import pandas as pd
 import numpy as np
 import json
-import panel as pn
-import pandas as pd
-import os
-import pandas as pd
-import random
-import asyncio
 import matplotlib.pyplot as plt
 from bokeh.plotting import figure
 from bokeh.io import push_notebook, show
@@ -29,56 +23,47 @@ from bokeh.models import Span, Label
 from bokeh.models import ColumnDataSource, Button
 from my_modules import *
 from datasets import load_dataset
 #Silence FutureWarnings & UserWarnings
 warnings.filterwarnings('ignore', category= FutureWarning)
 warnings.filterwarnings('ignore', category= UserWarning)
-#input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
-present_dir = os.path.dirname(os.path.realpath(__file__))
-# Construct the full path to the stored_variables.json file
-json_path = os.path.join(present_dir, 'stored_variables.json')
-with open(json_path, 'r') as file:
-        stored_vars = json.load(file)
-        directory = stored_vars['base_dir']
-        input_path = os.path.join(present_dir,directory)
-        set_path = stored_vars['set_path']
-        selected_metadata_files = stored_vars['selected_metadata_files']
-        ls_samples = stored_vars['ls_samples']
-base_dir = input_path
-#input_path = '/Users/harshithakolipaka/Desktop/CycIF/wetransfer_data-zip_2024-05-17_1431'
-#set_path = 'test'
-#selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
-#ls_samples = ['Ashlar_Exposure_Time.csv', 'new_data.csv', 'DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
 pn.extension()
 update_button = pn.widgets.Button(name='CSV Files', button_type='primary')
 def update_samples(event):
-    with open(json_path, 'r') as file:
         stored_vars = json.load(file)
-        print(stored_vars)
-        ls_samples = stored_vars['ls_samples']
-        return f'CSV Files Selected: {ls_samples}'
 update_button.on_click(update_samples)
 csv_files_button = pn.widgets.Button(icon="clipboard", button_type="primary")
 indicator = pn.indicators.LoadingSpinner(value=False, size=25)
 def handle_click(clicks):
-    with open(json_path, 'r') as file:
         stored_vars = json.load(file)
-        print(stored_vars)
-        #ls_samples = stored_vars['ls_samples']
-    #return f'CSV Files Selected: {ls_samples}'
-# pn.Row(csv_files_button,pn.bind(
-# , csv_files_button.param.clicks),)
 # ## I.2. *DIRECTORIES
-#set_path = 'test'
 # Set base directory
@@ -132,7 +117,7 @@ for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata
         print("The", d, "directory already exists !")
 os.chdir(input_data_dir)
-with open(json_path, 'r') as file:
         stored_vars = json.load(file)
 #        ls_samples = stored_vars['ls_samples']
         selected_metadata_files = stored_vars['selected_metadata_files']
@@ -180,6 +165,13 @@ print('metadata_images_dir :', metadata_images_dir)
 #ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(".csv")]
 print("The following CSV files were detected:\n\n",[sample for sample in ls_samples], "\n\nin", input_data_dir, "directory.")
 def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
     if len(selected_metadata_files) == []:
         if not file:
@@ -200,44 +192,38 @@ def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
         return combined_metadata_df
     else:
-        '''if selected_metadata_files:
             single_file_path = os.path.join(metadata_dir, selected_metadata_files[0])
             single_file_df = pd.read_csv(single_file_path)
             print(f"Only one file selected: {selected_metadata_files[0]}")
-            return single_file_df'''
-        if len(selected_metadata_files) == 1:
-            combined_metadata_path = os.path.join(metadata_dir, 'combined_metadata.csv')
-            if os.path.exists(combined_metadata_path):
-                print(f"Combined metadata file already exists: {combined_metadata_path}")
-                combined_metadata_df = pd.read_csv(combined_metadata_path)
-            else:
-                if selected_metadata_files:
-                    combined_metadata_df = pd.DataFrame()
-                    for file in selected_metadata_files:
-                        file_path = os.path.join(metadata_dir, file)
-                        metadata_df = pd.read_csv(file_path)
-                        combined_metadata_df = pd.concat([combined_metadata_df, metadata_df], ignore_index=True)
-                    combined_metadata_df.to_csv(combined_metadata_path, index=False)
-                    print(f"Combined metadata saved to: {combined_metadata_path}")
-                else:
-                    print("No metadata files selected.")
-                    combined_metadata_df = pd.DataFrame()
-            return combined_metadata_df
 print(combine_and_save_metadata_files(metadata_dir, selected_metadata_files))
 ls_samples
 path = os.path.join(input_data_dir, ls_samples[0])
 #df = load_dataset('csv', data_files = path )
 df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]),index_col = 0, nrows = 1)
 df.head(10)
 # First gather information on expected headers using first file in ls_samples
 # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
 df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
@@ -248,7 +234,17 @@ print("df :\n", df.head(), "\n")
 print("df's columns :\n", df.columns, "\n")
 print("df's index :\n", df.index, "\n")
 print("df's index name :\n", df.index.name)
 df.head()
 # Verify that the ID column in input file became the index
 # Verify that the index name column is "ID", if not, rename it
 if df.index.name != "ID":
@@ -276,15 +272,40 @@ print("\ndf :\n", df.head(), "\n")
 print("df's columns :\n", df.columns, "\n")
 print("df's index :\n", df.index, "\n")
 print("df's index name :\n", df.index.name)
 print("Used " + ls_samples[0] + " to determine the expected and corrected headers for all files.\n")
 print("These headers are: \n" + ", ".join([h for h in expected_headers]))
 corrected_headers = True
 for sample in ls_samples:
     file_path = os.path.join(input_data_dir,sample)
     print(file_path)
 # Import all the others files
 dfs = {}
 ###############################
@@ -439,16 +460,32 @@ else:
         file_not_intensities.write(item + "\n")
     file_not_intensities.close()
 not_intensities_df = pd.read_csv(path_not_intensities)
 not_intensities_df
 # Columns we want to keep: not_intensities, and any intensity column that contains 'Intensity_Average' (drop any intensity marker column that is not a mean intensity)
 to_keep = not_intensities + [x for x in df.columns.values[~df.columns.isin(not_intensities)] if 'Intensity_Average' in x]
 to_keep
 print(len(to_keep) - 1)
 # However, our to_keep list contains items that might not be in our df headers!
 # These items are from our not_intensities list. So let's ask for only those items from to_keep that are actually found in our df
 # Retains only the columns from the to_keep list that are found in the df's headers (columns).
@@ -458,17 +495,14 @@ df = df[[x for x in to_keep if x in df.columns.values]]
 df.head()
-# Assuming you have a DataFrame named 'df'
-# df = pd.read_csv('your_file.csv')
-# Load or create the stored_variables.json file
-json_file_path = os.path.join(present_dir,"stored_variables.json")
-if os.path.exists(json_file_path):
-    with open(json_file_path, "r") as file:
-        stored_variables = json.load(file)
-else:
-    stored_variables = {}
 # Get all column names
 all_columns = df.columns.tolist()
@@ -495,95 +529,6 @@ intensity_marker = list(set(intensity_marker))
 print("Intensity Markers:")
 print(intensity_marker)
-# Create a DataFrame with the intensity markers and default values
-marker_options_df = pd.DataFrame({
-    'Marker': intensity_marker,
-    'Cell': [True] * len(intensity_marker),
-    'Cytoplasm': [False] * len(intensity_marker),
-    'Nucleus': [False] * len(intensity_marker)
-})
-# Define formatters for the Tabulator widget
-tabulator_formatters = {
-    'Cell': {'type': 'tickCross'},
-    'Cytoplasm': {'type': 'tickCross'},
-    'Nucleus': {'type': 'tickCross'}
-}
-# Create the Tabulator widget
-tabulator = pn.widgets.Tabulator(marker_options_df, formatters=tabulator_formatters, sizing_mode='stretch_width')
-# Create a DataFrame to store the initial intensities
-new_data = [{'Description': f"{marker}_Cell_Intensity_Average"} for marker in intensity_marker if True]
-new_data_df = pd.DataFrame(new_data)
-# Create a widget to display the new data as a DataFrame
-new_data_table = pn.widgets.Tabulator(new_data_df, name='New Data Table', sizing_mode='stretch_width')
-# Create a button to start the update process
-run_button = pn.widgets.Button(name="Save Selection", button_type='primary')
-# Function to update stored_variables.json
-def update_stored_variables(selected_columns):
-    stored_variables["selected_intensities"] = selected_columns
-    with open(json_file_path, "w") as file:
-        json.dump(stored_variables, file, indent=4)
-# Define the update_intensities function
-def update_intensities(event=None):
-    global new_data, new_data_df
-    new_data = []
-    selected_columns = []
-    for _, row in tabulator.value.iterrows():
-        marker = row['Marker']
-        if row['Cell']:
-            new_data.append({'Description': f"{marker}_Cell_Intensity_Average"})
-            selected_columns.append(f"{marker}_Cell_Intensity_Average")
-        if row['Cytoplasm']:
-            new_data.append({'Description': f"{marker}_Cytoplasm_Intensity_Average"})
-            selected_columns.append(f"{marker}_Cytoplasm_Intensity_Average")
-        if row['Nucleus']:
-            new_data.append({'Description': f"{marker}_Nucleus_Intensity_Average"})
-            selected_columns.append(f"{marker}_Nucleus_Intensity_Average")
-    new_data_df = pd.DataFrame(new_data)
-    new_data_table.value = new_data_df
-    update_stored_variables(selected_columns)
-    print("Updated intensities DataFrame:")
-    print(new_data_df)
-# Define the runner function
-async def runner(event):
-    update_intensities()
-# Bind the runner function to the button
-run_button.on_click(runner)
-# Attach the update_intensities function to changes in the Tabulator widget
-tabulator.param.watch(update_intensities, 'value')
-# Layout
-updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
-pn.extension('tabulator')
-'''
-# Iterate over each column name
-for column in all_columns:
-    # Check if the column name contains 'Intensity_Average'
-    if 'Intensity_Average' in column:
-        # Split the column name by underscore
-        parts = column.split('_')
-        # Extract the word before the first underscore
-        marker = parts[0]
-        # Add the marker to the intensity_marker list
-        intensity_marker.append(marker)
-# Remove duplicates from the intensity_marker list
-intensity_marker = list(set(intensity_marker))
-print("Intensity Markers:")
-print(intensity_marker)
 # Create a callback function to update the intensities array
 def update_intensities(event):
     global intensities
@@ -608,6 +553,10 @@ def update_intensities(event):
     print("Updated intensities DataFrame:")
     print(intensities_df)
 tabulator_formatters = {
     'bool': {'type': 'tickCross'}
 }
@@ -626,6 +575,12 @@ tabulator.param.watch(update_intensities,'value')
 # Create a Panel layout with the Tabulator widget
 marker_options_layout = pn.Column(tabulator, sizing_mode="stretch_width")
 # Initialize the Panel extension with Tabulator
 pn.extension('tabulator')
@@ -682,13 +637,17 @@ run_button.on_click(runner)
 # Layout
 updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
-pn.extension()'''
 # Serve the layout
 #updated_intensities.servable()
 intensities_df = new_data_table
 intensities_df = pn.pane.DataFrame(intensities_df)
 print(intensities_df)
 # ## I.4. QC CHECKS
@@ -745,6 +704,10 @@ def check_index_format(index_str, ls_samples):
     # If all checks pass, return True
     return True
 # Let's take a look at a few features to make sure our dataframe is as expected
 df.index
 def check_format_ofindex(index):
@@ -758,11 +721,19 @@ def check_format_ofindex(index):
     return index_format
 print(check_format_ofindex(df.index))
 df.shape
 check_index = df.index
 check_shape = df.shape
 print(check_shape)
 # Check for NaN entries (should not be any unless columns do not align)
 # False means no NaN entries
 # True means NaN entries
@@ -770,6 +741,10 @@ df.isnull().any().any()
 check_no_null = df.isnull().any().any()
 # Check that all expected files were imported into final dataframe
 if sorted(df.Sample_ID.unique()) == sorted(ls_samples):
     print("All expected filenames are present in big df Sample_ID column.")
@@ -780,6 +755,10 @@ else:
 print(df.Sample_ID)
 # Delete rows that have 0 value mean intensities for intensity columns
 print("df.shape before removing 0 mean values: ", df.shape)
@@ -846,6 +825,9 @@ for key, value in quality_check_results.items():
     print(f"{key}: {value}")
 import panel as pn
 import pandas as pd
@@ -943,6 +925,8 @@ def create_line_graph2(quantile):
     return p
 # Bind the create_line_graph function to the quantile slider
 nucleus_size_line_graph_with_histogram = pn.bind(create_line_graph2, quantile=quantile_slider.param.value)
@@ -967,8 +951,17 @@ qs = [quantile, 0.50, 1.00 - quantile]
 quantiles = df['Nucleus_Size'].quantile(q=qs).values
 threshold = quantiles[2]
 print(threshold)
 import panel as pn
 import pandas as pd
 import numpy as np
@@ -1006,6 +999,10 @@ results_display = pn.bind(update_threshold_and_display, quantile_slider)
 # Layout the components in a Panel app
 layout2 = results_display
 print("Number of cells before filtering :", df.shape[0])
 cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
 # Delete small cells and objects w/high AF555 Signal (RBCs)
@@ -1113,6 +1110,10 @@ def calculate_quantiles(column, quantile):
     quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
     return quantiles
 quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
@@ -1122,10 +1123,30 @@ quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99,
 # Layout the components in a Panel app
 #nucleus_size_graph = pn.Column(nucleus_size_line_graph)
 len(intensities)
 df
 def calculate_cytoplasm_quantiles(column, quantile):
     # Print the columns of the DataFrame
     print("DataFrame columns:", df.columns)
@@ -1143,9 +1164,14 @@ def create_cytoplasm_intensity_df(column, quantile):
     return pn.pane.DataFrame(output)
 # Bind the create_app function to the quantile slider
-cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column=df.columns[10], quantile=quantile_slider.param.value)
 pn.Column(quantile_slider, cytoplasm_quantile_output_app)
 def calculate_cytoplasm_quantiles(column, quantile):
     quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
     return quantiles
@@ -1159,12 +1185,15 @@ def create_cytoplasm_intensity_df(column, quantile):
 # Bind the create_app function to the quantile slider
-cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column=df.columns[10], quantile = quantile_slider.param.value)
 pn.Column(quantile_slider,cytoplasm_quantile_output_app)
 # ## I.5. COLUMNS OF INTERESTS
 # Remove columns containing "DAPI"
 df = df[[x for x in df.columns.values if 'DAPI' not in x]]
@@ -1172,6 +1201,9 @@ print("Columns are now...")
 print([c for c in df.columns.values])
 # Create lists of full names and shortened names to use in plotting
 full_to_short_names, short_to_full_names =  \
     shorten_feature_names(df.columns.values[~df.columns.isin(not_intensities)])
@@ -1179,6 +1211,9 @@ full_to_short_names, short_to_full_names =  \
 short_to_full_names
 # Save this data to a metadata file
 filename = os.path.join(metadata_dir, "full_to_short_column_names.csv")
 fh = open(filename, "w")
@@ -1189,6 +1224,10 @@ for k,v in full_to_short_names.items():
 fh.close()
 print("The full_to_short_column_names.csv file was created !")
 # Save this data to a metadata file
 filename = os.path.join(metadata_dir, "short_to_full_column_names.csv")
 fh = open(filename, "w")
@@ -1202,11 +1241,18 @@ print("The short_to_full_column_names.csv file was created !")
 # ## I.6. EXPOSURE TIME
 #import the ashlar analysis file
 file_path = os.path.join(metadata_dir, 'combined_metadata.csv')
 ashlar_analysis = pd.read_csv(file_path)
 ashlar_analysis
 # Extracting and renaming columns
 new_df = ashlar_analysis[['Name', 'Cycle', 'ChannelIndex', 'ExposureTime']].copy()
 new_df.rename(columns={
@@ -1225,6 +1271,10 @@ new_df.to_csv('Ashlar_Exposure_Time.csv', index=False)
 # Print the new dataframe
 print(new_df)
 # Here, we want to end up with a data structure that incorporates metadata on each intensity marker column used in our big dataframe in an easy-to-use format.
 # This is going to include the full name of the intensity marker columns in the big data frame,
 # the corresponding round and channel,
@@ -1254,21 +1304,41 @@ else:
     print("\nNo null values detected.")
 if len(exp_df['Target']) > len(exp_df['Target'].unique()):
     print("One or more non-unique Target values in exp_df. Currently not supported.")
 exp_df = exp_df.drop_duplicates(subset = 'Target').reindex()
 # sort exp_df by the values in the 'Target' column in ascending order and then retrieve the first few rows of the sorted df
 exp_df.sort_values(by = ['Target']).head()
 # Create lowercase version of target
 exp_df['target_lower'] = exp_df['Target'].str.lower()
 exp_df.head()
 # Create df that contains marker intensity columns in our df that aren't in not_intensities
 intensities = pd.DataFrame({'full_column':df.columns.values[~df.columns.isin(not_intensities)]})
 intensities
 # Extract the marker information from the `full_column`, which corresponds to full column in big dataframe
 # Use regular expressions (regex) to isolate the part of the field that begins (^) with an alphanumeric value (W), and ends with an underscore (_)
 # '$' is end of line
@@ -1277,10 +1347,20 @@ intensities['marker'] = intensities['full_column'].str.extract(r'([^\W_]+)')
 intensities['marker_lower'] = intensities['marker'].str.lower()
 intensities
 # Subset the intensities df to exclude any column pertaining to DAPI
 intensities = intensities.loc[intensities['marker_lower'] != 'dapi']
 intensities.head()
 # Merge the intensities andexp_df together to create metadata
 metadata = pd.merge(exp_df, intensities, how = 'left', left_on = 'target_lower',right_on = 'marker_lower')
 metadata = metadata.drop(columns = ['marker_lower'])
@@ -1290,14 +1370,27 @@ metadata = metadata.dropna()
 # target_lower is Target in small caps
 # marker is the extracted first component of the full column in segmentation data, with corresponding capitalization
 metadata
 # Add a column to signify marker target localisation.
 # Use a lambda to determine segmented location of intensity marker column and update metadata accordingly
 # Using the add_metadata_location() function in my_modules.py
 metadata['localisation'] = metadata.apply(
     lambda row: add_metadata_location(row), axis = 1)
 mlid = metadata
 # Save this data structure to the metadata folder
 # don't want to add color in because that's better off treating color the same for round, channel, and sample
 filename = "marker_intensity_metadata.csv"
@@ -1336,6 +1429,10 @@ custom_colors_values = sb.palplot(sb.color_palette([custom_colors.get(ch, 'blue'
 print("Unique channels are:", metadata.Channel.unique())
 sb.palplot(sb.color_palette(channel_color_values))
 # Function to create a palette plot with custom colors
 def create_palette_plot():
     # Get unique channels
@@ -1398,6 +1495,9 @@ app_palette_plot = create_palette_plot(custom_colors)
 #app_palette_plot.servable()
 # Store in a dictionary
 channel_color_dict = dict(zip(metadata.Channel.unique(), channel_color_values))
 channel_color_dict
@@ -1406,6 +1506,10 @@ for k,v in channel_color_dict.items():
 channel_color_dict
 color_df_channel = color_dict_to_df(channel_color_dict, "Channel")
 # Save to file in metadatadirectory
@@ -1415,6 +1519,10 @@ color_df_channel.to_csv(filename, index = False)
 color_df_channel
 # Legend of channel info only
 g  = plt.figure(figsize = (1,1)).add_subplot(111)
 g.axis('off')
@@ -1448,6 +1556,10 @@ sb.palplot(sb.color_palette(round_color_values))
 ## TO-DO: write what these parameters mean
 # Store in a dictionary
 round_color_dict = dict(zip(metadata.Round.unique(), round_color_values))
@@ -1456,6 +1568,10 @@ for k,v in round_color_dict.items():
 round_color_dict
 color_df_round = color_dict_to_df(round_color_dict, "Round")
 # Save to file in metadatadirectory
@@ -1485,6 +1601,9 @@ plt.savefig(filename, bbox_inches = 'tight')
 # ### I.7.3. SAMPLES COLORS
 # we want colors that are neither sequential nor categorical.
 # Categorical would be ideal if we could generate an arbitrary number of colors, but I do not think that we can.
 # Hense, we will choose `n` colors from a continuous palette. First we will generate the right number of colors. Later, we will assign TMA samples to gray.
@@ -1496,10 +1615,18 @@ color_values = sb.color_palette("husl",n_colors = len(ls_samples))#'HLS'
 # Display those unique colors
 sb.palplot(sb.color_palette(color_values))
 TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
 TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
 sb.palplot(sb.color_palette(TMA_color_values))
 # Store in a dictionary
 color_dict = dict()
 color_dict = dict(zip(df.Sample_ID.unique(), color_values))
@@ -1615,34 +1742,16 @@ variable_widget = pn.widgets.Select(name="Target", value="Exp", options=list(met
 window_widget = pn.widgets.IntSlider(name="window", value=30, start=1, end=60)
 sigma_widget = pn.widgets.IntSlider(name="sigma", value=10, start=0, end=20)
-# Function to save files
-def save_files(event):
-    for sample in ls_samples:
-        sample_id = sample.split('.csv')[0]
-        filename = os.path.join(output_data_dir, sample_id + "_" + step_suffix + ".csv")
-        df_save = df.loc[df['Sample_ID'] == sample, :]
-        if os.path.exists(filename):
-            df_save.to_csv(filename, index=True, index_label='ID', mode='w')  # Overwrite by default
-            print(f"File {filename} was overwritten!")
-        else:
-            df_save.to_csv(filename, index=True, index_label='ID')  # Save normally if the file doesn't exist
-            print(f"File {filename} was created and saved!")
-# Button to download files
-download_button = pn.widgets.Button(name='Download Files', button_type='primary')
-download_button.on_click(save_files)
 app = pn.template.GoldenTemplate(
     site="Cyc-IF",
     title="Quality Control",
     main=[
         pn.Tabs(
             ("Dataframes", pn.Column(
-                pn.Row(csv_files_button,pn.bind(handle_click, csv_files_button.param.clicks), ),
                 pn.pane.Markdown("### The Dataframe uploaded:"), pn.pane.DataFrame(intial_dataframe),
                 #pn.pane.Markdown("### The Exposure time DataFrame is :"), pn.pane.DataFrame(exp_df.head()),
-                pn.pane.Markdown("### The DataFrame after merging CycIF data x metadata :"), pn.pane.DataFrame(merged_dataframe.head(25)),
             )),
             ("Quality Control", pn.Column(
                 quality_check(quality_control_df, not_intensities)
@@ -1656,17 +1765,19 @@ app = pn.template.GoldenTemplate(
             )),
             ("Plots", pn.Column(
                 #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(nucleus_size_line_graph_with_histogram, num_of_cell_removal),
-                pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(plot1,layout2),
                 #pn.pane.Markdown("### Nucleus Distribution Plot:"), pn.Column(nucleus_size_plot, nucleus_size_graph),
                 pn.pane.Markdown(" ### Intensity Average Plot:"), pn.Row(selected_marker_plot,num_of_cell_removal_intensity ),
                 #pn.Column(pn.Column(column_dropdown, generate_plot_button), quantile_slider, plot),
                 #pn.pane.Markdown("### Cytoplasm Intensity Plot:"), cytoplasm_intensity_plot,
                 #pn.pane.Markdown("### AF555_Cell_Intensity_Average:"), quantile_output_app,
-                #pn.pane.Markdown("### Distribution of AF555_Cell_Intensity_Average with Quantiles:"), quantile_intensity_plot),
-                pn.Column(download_button),
             )),
 ),
     ])
-app.servable()

 import pandas as pd
 import numpy as np
 import json
 import matplotlib.pyplot as plt
 from bokeh.plotting import figure
 from bokeh.io import push_notebook, show
 from bokeh.models import ColumnDataSource, Button
 from my_modules import *
 from datasets import load_dataset
+os.getcwd()
 #Silence FutureWarnings & UserWarnings
 warnings.filterwarnings('ignore', category= FutureWarning)
 warnings.filterwarnings('ignore', category= UserWarning)
+#present_dir = os.path.dirname(os.path.realpath(__file__))
+#input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
+base_dir = '/code/wetransfer_data-zip_2024-05-17_1431'
+set_path = 'test'
+selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
+ls_samples = ['DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
 pn.extension()
 update_button = pn.widgets.Button(name='CSV Files', button_type='primary')
 def update_samples(event):
+    with open('stored_variables.json', 'r') as file:
         stored_vars = json.load(file)
+#        ls_samples = stored_vars['ls_samples']
+    print(ls_samples)
 update_button.on_click(update_samples)
 csv_files_button = pn.widgets.Button(icon="clipboard", button_type="primary")
 indicator = pn.indicators.LoadingSpinner(value=False, size=25)
 def handle_click(clicks):
+    with open('stored_variables.json', 'r') as file:
         stored_vars = json.load(file)
+#        ls_samples = stored_vars['ls_samples']
+    return f'CSV Files Selected: {ls_samples}'
+pn.Row(
+    csv_files_button,
+    pn.bind(handle_click, csv_files_button.param.clicks),
+)
 # ## I.2. *DIRECTORIES
+set_path = 'test'
 # Set base directory
         print("The", d, "directory already exists !")
 os.chdir(input_data_dir)
+with open('stored_variables.json', 'r') as file:
         stored_vars = json.load(file)
 #        ls_samples = stored_vars['ls_samples']
         selected_metadata_files = stored_vars['selected_metadata_files']
 #ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(".csv")]
 print("The following CSV files were detected:\n\n",[sample for sample in ls_samples], "\n\nin", input_data_dir, "directory.")
+# In[26]:
+import os
+import pandas as pd
 def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
     if len(selected_metadata_files) == []:
         if not file:
         return combined_metadata_df
     else:
+        if selected_metadata_files:
             single_file_path = os.path.join(metadata_dir, selected_metadata_files[0])
             single_file_df = pd.read_csv(single_file_path)
             print(f"Only one file selected: {selected_metadata_files[0]}")
+            return single_file_df
+        else:
+            print("No metadata files selected.")
+            return pd.DataFrame()
+# In[27]:
 print(combine_and_save_metadata_files(metadata_dir, selected_metadata_files))
+# In[28]:
 ls_samples
+# In[29]:
 path = os.path.join(input_data_dir, ls_samples[0])
 #df = load_dataset('csv', data_files = path )
 df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]),index_col = 0, nrows = 1)
 df.head(10)
+# In[30]:
 # First gather information on expected headers using first file in ls_samples
 # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
 df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
 print("df's columns :\n", df.columns, "\n")
 print("df's index :\n", df.index, "\n")
 print("df's index name :\n", df.index.name)
+# In[31]:
 df.head()
+# In[32]:
 # Verify that the ID column in input file became the index
 # Verify that the index name column is "ID", if not, rename it
 if df.index.name != "ID":
 print("df's columns :\n", df.columns, "\n")
 print("df's index :\n", df.index, "\n")
 print("df's index name :\n", df.index.name)
+# In[33]:
+df.head()
+# In[34]:
+df.head()
+# In[35]:
 print("Used " + ls_samples[0] + " to determine the expected and corrected headers for all files.\n")
 print("These headers are: \n" + ", ".join([h for h in expected_headers]))
 corrected_headers = True
+# In[36]:
 for sample in ls_samples:
     file_path = os.path.join(input_data_dir,sample)
     print(file_path)
+# In[37]:
 # Import all the others files
 dfs = {}
 ###############################
         file_not_intensities.write(item + "\n")
     file_not_intensities.close()
+# In[46]:
 not_intensities_df = pd.read_csv(path_not_intensities)
 not_intensities_df
+# In[47]:
 # Columns we want to keep: not_intensities, and any intensity column that contains 'Intensity_Average' (drop any intensity marker column that is not a mean intensity)
 to_keep = not_intensities + [x for x in df.columns.values[~df.columns.isin(not_intensities)] if 'Intensity_Average' in x]
 to_keep
+# In[48]:
 print(len(to_keep) - 1)
+# In[49]:
 # However, our to_keep list contains items that might not be in our df headers!
 # These items are from our not_intensities list. So let's ask for only those items from to_keep that are actually found in our df
 # Retains only the columns from the to_keep list that are found in the df's headers (columns).
 df.head()
+# In[50]:
+import pandas as pd
+# Assuming you have a DataFrame named 'df'
+# df = pd.read_csv('your_file.csv')
 # Get all column names
 all_columns = df.columns.tolist()
 print("Intensity Markers:")
 print(intensity_marker)
 # Create a callback function to update the intensities array
 def update_intensities(event):
     global intensities
     print("Updated intensities DataFrame:")
     print(intensities_df)
+# In[54]:
 tabulator_formatters = {
     'bool': {'type': 'tickCross'}
 }
 # Create a Panel layout with the Tabulator widget
 marker_options_layout = pn.Column(tabulator, sizing_mode="stretch_width")
+import panel as pn
+import pandas as pd
+import random
+import asyncio
 # Initialize the Panel extension with Tabulator
 pn.extension('tabulator')
 # Layout
 updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
+pn.extension()
 # Serve the layout
 #updated_intensities.servable()
 intensities_df = new_data_table
+intensities_df
 intensities_df = pn.pane.DataFrame(intensities_df)
+intensities_df
 print(intensities_df)
 # ## I.4. QC CHECKS
     # If all checks pass, return True
     return True
+# In[70]:
 # Let's take a look at a few features to make sure our dataframe is as expected
 df.index
 def check_format_ofindex(index):
     return index_format
 print(check_format_ofindex(df.index))
+# In[71]:
 df.shape
 check_index = df.index
 check_shape = df.shape
 print(check_shape)
+# In[72]:
 # Check for NaN entries (should not be any unless columns do not align)
 # False means no NaN entries
 # True means NaN entries
 check_no_null = df.isnull().any().any()
+# In[73]:
 # Check that all expected files were imported into final dataframe
 if sorted(df.Sample_ID.unique()) == sorted(ls_samples):
     print("All expected filenames are present in big df Sample_ID column.")
 print(df.Sample_ID)
+# In[74]:
 # Delete rows that have 0 value mean intensities for intensity columns
 print("df.shape before removing 0 mean values: ", df.shape)
     print(f"{key}: {value}")
+# In[80]:
 import panel as pn
 import pandas as pd
     return p
 # Bind the create_line_graph function to the quantile slider
 nucleus_size_line_graph_with_histogram = pn.bind(create_line_graph2, quantile=quantile_slider.param.value)
 quantiles = df['Nucleus_Size'].quantile(q=qs).values
 threshold = quantiles[2]
+# In[89]:
 print(threshold)
+# In[90]:
 import panel as pn
 import pandas as pd
 import numpy as np
 # Layout the components in a Panel app
 layout2 = results_display
+# In[91]:
 print("Number of cells before filtering :", df.shape[0])
 cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
 # Delete small cells and objects w/high AF555 Signal (RBCs)
     quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
     return quantiles
+# In[105]:
 quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
 # Layout the components in a Panel app
 #nucleus_size_graph = pn.Column(nucleus_size_line_graph)
+# In[106]:
+#df["CKs_Cytoplasm_Intensity_Average"].quantile(q=qs)
+# In[107]:
 len(intensities)
+if 'CKs_Cytoplasm_Intensity_Average' in intensities:
+    print(1)
+# In[108]:
 df
+# In[109]:
 def calculate_cytoplasm_quantiles(column, quantile):
     # Print the columns of the DataFrame
     print("DataFrame columns:", df.columns)
     return pn.pane.DataFrame(output)
 # Bind the create_app function to the quantile slider
+cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column='CKs_Cytoplasm_Intensity_Average', quantile=quantile_slider.param.value)
 pn.Column(quantile_slider, cytoplasm_quantile_output_app)
+# In[110]:
 def calculate_cytoplasm_quantiles(column, quantile):
     quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
     return quantiles
 # Bind the create_app function to the quantile slider
+cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column='CKs_Cytoplasm_Intensity_Average', quantile = quantile_slider.param.value)
 pn.Column(quantile_slider,cytoplasm_quantile_output_app)
 # ## I.5. COLUMNS OF INTERESTS
+# In[111]:
 # Remove columns containing "DAPI"
 df = df[[x for x in df.columns.values if 'DAPI' not in x]]
 print([c for c in df.columns.values])
+# In[112]:
 # Create lists of full names and shortened names to use in plotting
 full_to_short_names, short_to_full_names =  \
     shorten_feature_names(df.columns.values[~df.columns.isin(not_intensities)])
 short_to_full_names
+# In[113]:
 # Save this data to a metadata file
 filename = os.path.join(metadata_dir, "full_to_short_column_names.csv")
 fh = open(filename, "w")
 fh.close()
 print("The full_to_short_column_names.csv file was created !")
+# In[114]:
 # Save this data to a metadata file
 filename = os.path.join(metadata_dir, "short_to_full_column_names.csv")
 fh = open(filename, "w")
 # ## I.6. EXPOSURE TIME
+# In[115]:
 #import the ashlar analysis file
 file_path = os.path.join(metadata_dir, 'combined_metadata.csv')
 ashlar_analysis = pd.read_csv(file_path)
 ashlar_analysis
+# In[116]:
 # Extracting and renaming columns
 new_df = ashlar_analysis[['Name', 'Cycle', 'ChannelIndex', 'ExposureTime']].copy()
 new_df.rename(columns={
 # Print the new dataframe
 print(new_df)
+# In[117]:
 # Here, we want to end up with a data structure that incorporates metadata on each intensity marker column used in our big dataframe in an easy-to-use format.
 # This is going to include the full name of the intensity marker columns in the big data frame,
 # the corresponding round and channel,
     print("\nNo null values detected.")
+# In[118]:
 if len(exp_df['Target']) > len(exp_df['Target'].unique()):
     print("One or more non-unique Target values in exp_df. Currently not supported.")
 exp_df = exp_df.drop_duplicates(subset = 'Target').reindex()
+# In[119]:
 # sort exp_df by the values in the 'Target' column in ascending order and then retrieve the first few rows of the sorted df
 exp_df.sort_values(by = ['Target']).head()
+# In[120]:
 # Create lowercase version of target
 exp_df['target_lower'] = exp_df['Target'].str.lower()
 exp_df.head()
+# In[121]:
 # Create df that contains marker intensity columns in our df that aren't in not_intensities
 intensities = pd.DataFrame({'full_column':df.columns.values[~df.columns.isin(not_intensities)]})
 intensities
+# In[122]:
 # Extract the marker information from the `full_column`, which corresponds to full column in big dataframe
 # Use regular expressions (regex) to isolate the part of the field that begins (^) with an alphanumeric value (W), and ends with an underscore (_)
 # '$' is end of line
 intensities['marker_lower'] = intensities['marker'].str.lower()
 intensities
+# In[123]:
 # Subset the intensities df to exclude any column pertaining to DAPI
 intensities = intensities.loc[intensities['marker_lower'] != 'dapi']
 intensities.head()
+# In[124]:
 # Merge the intensities andexp_df together to create metadata
 metadata = pd.merge(exp_df, intensities, how = 'left', left_on = 'target_lower',right_on = 'marker_lower')
 metadata = metadata.drop(columns = ['marker_lower'])
 # target_lower is Target in small caps
 # marker is the extracted first component of the full column in segmentation data, with corresponding capitalization
 metadata
+# In[125]:
 # Add a column to signify marker target localisation.
 # Use a lambda to determine segmented location of intensity marker column and update metadata accordingly
 # Using the add_metadata_location() function in my_modules.py
 metadata['localisation'] = metadata.apply(
     lambda row: add_metadata_location(row), axis = 1)
+# In[126]:
 mlid = metadata
+# In[127]:
 # Save this data structure to the metadata folder
 # don't want to add color in because that's better off treating color the same for round, channel, and sample
 filename = "marker_intensity_metadata.csv"
 print("Unique channels are:", metadata.Channel.unique())
 sb.palplot(sb.color_palette(channel_color_values))
+# In[131]:
 # Function to create a palette plot with custom colors
 def create_palette_plot():
     # Get unique channels
 #app_palette_plot.servable()
+# In[133]:
 # Store in a dictionary
 channel_color_dict = dict(zip(metadata.Channel.unique(), channel_color_values))
 channel_color_dict
 channel_color_dict
+# In[134]:
 color_df_channel = color_dict_to_df(channel_color_dict, "Channel")
 # Save to file in metadatadirectory
 color_df_channel
+# In[135]:
 # Legend of channel info only
 g  = plt.figure(figsize = (1,1)).add_subplot(111)
 g.axis('off')
 ## TO-DO: write what these parameters mean
+# In[137]:
 # Store in a dictionary
 round_color_dict = dict(zip(metadata.Round.unique(), round_color_values))
 round_color_dict
+# In[138]:
 color_df_round = color_dict_to_df(round_color_dict, "Round")
 # Save to file in metadatadirectory
 # ### I.7.3. SAMPLES COLORS
+# In[140]:
 # we want colors that are neither sequential nor categorical.
 # Categorical would be ideal if we could generate an arbitrary number of colors, but I do not think that we can.
 # Hense, we will choose `n` colors from a continuous palette. First we will generate the right number of colors. Later, we will assign TMA samples to gray.
 # Display those unique colors
 sb.palplot(sb.color_palette(color_values))
+# In[141]:
 TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
 TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
 sb.palplot(sb.color_palette(TMA_color_values))
+# In[142]:
 # Store in a dictionary
 color_dict = dict()
 color_dict = dict(zip(df.Sample_ID.unique(), color_values))
 window_widget = pn.widgets.IntSlider(name="window", value=30, start=1, end=60)
 sigma_widget = pn.widgets.IntSlider(name="sigma", value=10, start=0, end=20)
 app = pn.template.GoldenTemplate(
     site="Cyc-IF",
     title="Quality Control",
     main=[
         pn.Tabs(
             ("Dataframes", pn.Column(
+                pn.Row(csv_files_button,pn.bind(handle_click, csv_files_button.param.clicks)),
                 pn.pane.Markdown("### The Dataframe uploaded:"), pn.pane.DataFrame(intial_dataframe),
                 #pn.pane.Markdown("### The Exposure time DataFrame is :"), pn.pane.DataFrame(exp_df.head()),
+                pn.pane.Markdown("### The DataFrame after merging CycIF data x metadata :"), pn.pane.DataFrame(merged_dataframe.head()),
             )),
             ("Quality Control", pn.Column(
                 quality_check(quality_control_df, not_intensities)
             )),
             ("Plots", pn.Column(
                 #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(nucleus_size_line_graph_with_histogram, num_of_cell_removal),
+                #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(plot1,layout2),
                 #pn.pane.Markdown("### Nucleus Distribution Plot:"), pn.Column(nucleus_size_plot, nucleus_size_graph),
                 pn.pane.Markdown(" ### Intensity Average Plot:"), pn.Row(selected_marker_plot,num_of_cell_removal_intensity ),
                 #pn.Column(pn.Column(column_dropdown, generate_plot_button), quantile_slider, plot),
                 #pn.pane.Markdown("### Cytoplasm Intensity Plot:"), cytoplasm_intensity_plot,
                 #pn.pane.Markdown("### AF555_Cell_Intensity_Average:"), quantile_output_app,
+                #pn.pane.Markdown("### Distribution of AF555_Cell_Intensity_Average with Quantiles:"), quantile_intensity_plot)
             )),
 ),
     ])
+app.servable()
+if __name__ == "__main__":
+    pn.serve(app, port=5007)