Spaces:

JFoz
/

FociMapper

Sleeping

App Files Files Community

foz commited on Aug 31, 2023

Commit

1d89ca0

•

2 Parent(s): d4c993e 29f7eb3

Fix README

Browse files

Files changed (10) hide show

app.py +81 -0
path_analysis/__init__.py +0 -0
path_analysis/analyse.py +416 -0
path_analysis/data_preprocess.py +395 -0
requirements.txt +7 -0
setup.py +29 -0
tests/__init__.py +0 -0
tests/test_analyse.py +370 -0
tests/test_preprocess.py +150 -0
tests/test_results.py +150 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+from tifffile import imread
+from PIL import Image
+from path_analysis.analyse import analyse_paths
+import numpy as np
+# Function to preview the imported image
+def preview_image(file1):
+    if file1:
+        print('Uploading image', file1.name)
+        im = imread(file1.name)
+        print(im.ndim, im.shape)
+        if im.ndim>2:
+            return Image.fromarray(np.max(im, axis=0))
+        else:
+            return Image.fromarray(im)
+    else:
+        return None
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            # Inputs for cell ID, image, and path
+            cellid_input = gr.Textbox(label="Cell ID", placeholder="Image_1")
+            image_input = gr.File(label="Input foci image")
+            image_preview = gr.Image(label="Max projection of foci image")
+            image_input.change(fn=preview_image, inputs=image_input, outputs=image_preview)
+            path_input = gr.File(label="SNT traces file")
+            # Additional options wrapped in an accordion for better UI experience
+            with gr.Accordion("Additional options ..."):
+                sphere_radius = gr.Number(label="Trace sphere radius (um)", value=0.1984125, interactive=True)
+                peak_threshold = gr.Number(label="Peak relative threshold", value=0.4, interactive=True)
+                # Resolutions for xy and z axis
+                with gr.Row():
+                    xy_res = gr.Number(label='xy-yesolution (um)', value=0.0396825, interactive=True)
+                    z_res = gr.Number(label='z resolution (um)', value=0.0909184, interactive=True)
+                # Resolutions for xy and z axis
+                threshold_type = gr.Radio(["per-trace", "per-cell"], label="Threshold-type", value="per-trace", interactive=True)
+                use_corrected_positions = gr.Checkbox(label="Correct foci position measurements", value=True, interactive=True)
+                screening_distance = gr.Number(label='Screening distance (voxels)', value=10, interactive=True)
+        # The output column showing the result of processing
+        with gr.Column():
+            trace_output = gr.Image(label="Overlayed paths")
+            image_output=gr.Gallery(label="Traced paths")
+            plot_output=gr.Plot(label="Foci intensity traces")
+            data_output=gr.DataFrame(label="Detected peak data")#, "Peak 1 pos", "Peak 1 int"])
+            data_file_output=gr.File(label="Output data file (.csv)")
+    def process(cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type, use_corrected_positions, screening_distance):
+        config = { 'sphere_radius': sphere_radius,
+                   'peak_threshold': peak_threshold,
+                   'xy_res': xy_res,
+                   'z_res': z_res,
+                   'threshold_type': threshold_type,
+                   'use_corrected_positions': use_corrected_positions,
+                   'screening_distance': screening_distance,
+                  }
+        paths, traces, fig, extracted_peaks = analyse_paths(cellid_input, image_input.name, path_input.name, config)
+        extracted_peaks.to_csv('output.csv')
+        print('extracted', extracted_peaks)
+        return paths, [Image.fromarray(im) for im in traces], fig, extracted_peaks, 'output.csv'
+    with gr.Row():
+        greet_btn = gr.Button("Process")
+        greet_btn.click(fn=process, inputs=[cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type, use_corrected_positions, screening_distance], outputs=[trace_output, image_output, plot_output, data_output, data_file_output], api_name="process")
+if __name__ == "__main__":
+    demo.launch()

path_analysis/__init__.py ADDED Viewed

File without changes

path_analysis/analyse.py ADDED Viewed

	@@ -0,0 +1,416 @@

+import lxml.etree as ET
+import gzip
+import tifffile
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image, ImageDraw
+import pandas as pd
+from itertools import cycle
+from .data_preprocess import analyse_traces
+import math
+import scipy.linalg as la
+def get_paths_from_traces_file(traces_file):
+    """
+    Parses the specified traces file and extracts paths and their lengths.
+    Args:
+        traces_file (str): Path to the XML traces file.
+    Returns:
+        tuple: A tuple containing a list of paths (each path is a list of tuples representing points)
+               and a list of corresponding path lengths.
+    """
+    tree = ET.parse(traces_file)
+    root = tree.getroot()
+    all_paths = []
+    path_lengths = []
+    for path in root.findall('path'):
+        length=path.get('reallength')
+        path_points = []
+        for point in path:
+            path_points.append((int(point.get('x')), int(point.get('y')), int(point.get('z'))))
+        all_paths.append(path_points)
+        path_lengths.append(float(length))
+    return all_paths, path_lengths
+def calculate_path_length_partials(point_list, voxel_size=(1,1,1)):
+    """
+    Calculate the partial path length of a series of points.
+    Args:
+    point_list (list of tuple): List of points, each represented as a tuple of coordinates (x, y, z).
+    voxel_size (tuple, optional): Size of the voxel in each dimension (x, y, z). Defaults to (1, 1, 1).
+    Returns:
+    numpy.ndarray: Array of cumulative partial path lengths at each point.
+    """
+    # Simple calculation
+    section_lengths = [0.0]
+    s = np.array(voxel_size)
+    for i in range(len(point_list)-1):
+        # Euclidean distance between successive points
+        section_lengths.append(la.norm(s * (np.array(point_list[i+1]) - np.array(point_list[i]))))
+    return np.cumsum(section_lengths)
+def visualise_ordering(points_list, dim, wr=5, wc=5):
+    """
+    Visualize the ordering of points in an image.
+    Args:
+        points_list (list): List of points to be visualized.
+        dim (tuple): Dimensions of the image (rows, columns, channels).
+        wr (int, optional): Width of the region to visualize around the point in the row direction. Defaults to 5.
+        wc (int, optional): Width of the region to visualize around the point in the column direction. Defaults to 5.
+    Returns:
+        np.array: An image array with visualized points.
+    """
+    # Visualizes the ordering of the points in the list on a blank image.
+    rdim, cdim, _ = dim
+    vis = np.zeros((rdim, cdim, 3), dtype=np.uint8)
+    def get_col(i):
+        r = int(255 * i/len(points_list))
+        g = 255 - r
+        return r, g, 0
+    for n, p in enumerate(points_list):
+        c, r, _ = map(int, p)
+        vis[max(0,r-wr):min(rdim,r+wr+1),max(0,c-wc):min(cdim,c+wc+1)] = get_col(n)
+    return vis
+# A color map for paths
+col_map = [(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255), (0,255,255),
+           (255,127,0), (255, 0, 127), (127, 255, 0), (0, 255, 127), (127,0,255), (0,127,255)]
+def draw_paths(all_paths, foci_stack, foci_index=None, r=3, screened_foci_data=None):
+    """
+    Draws paths on the provided image stack and overlays markers for the foci
+    Args:
+        all_paths (list): List of paths where each path is a list of points.
+        foci_stack (np.array): 3D numpy array representing the image stack.
+        foci_index (list, optional): List of list of focus indices (along each path). Defaults to None.
+        r (int, optional): Radius for the ellipse or line drawing around the focus. Defaults to 3.
+        screened_foci_data (list, optional): List of RemovedPeakData for screened foci
+    Returns:
+        PIL.Image.Image: An image with the drawn paths.
+    """
+    im = np.max(foci_stack, axis=0)
+    im = (im/np.max(im)*255).astype(np.uint8)
+    im = np.dstack((im,)*3)
+    im = Image.fromarray(im)
+    draw = ImageDraw.Draw(im)
+    for i, (p, col) in enumerate(zip(all_paths, cycle(col_map))):
+        draw.line([(u[0], u[1]) for u in p], fill=col)
+        draw.text((p[0][0], p[0][1]), str(i+1), fill=col)
+    if screened_foci_data is not None:
+        for i, removed_peaks in enumerate(screened_foci_data):
+            for p in removed_peaks:
+                u = all_paths[i][p.idx]
+                v = all_paths[p.screening_peak[0]][p.screening_peak[1]]
+                draw.line((int(u[0]), int(u[1]), int(v[0]), int(v[1])), fill=(127,127,127), width=2)
+    if foci_index is not None:
+        for i, (idx, p, col) in enumerate(zip(foci_index, all_paths, cycle(col_map))):
+            if len(idx):
+                for j in idx:
+                    draw.line((int(p[j][0]-r), int(p[j][1]), int(p[j][0]+r), int(p[j][1])), fill=col, width=2)
+                    draw.line((int(p[j][0]), int(p[j][1]-r), int(p[j][0]), int(p[j][1]+r)), fill=col, width=2)
+    return im
+def measure_from_mask(mask, measure_stack):
+    """
+    Compute the sum of measure_stack values where the mask is equal to 1.
+    Args:
+        mask (numpy.ndarray): Binary mask where the measurement should be applied.
+        measure_stack (numpy.ndarray): Stack of measurements.
+    Returns:
+        measure_stack.dtype: Sum of measure_stack values where the mask is 1.
+    """
+    return np.sum(mask * measure_stack)
+# Max of measure_stack over region where mask==1
+def max_from_mask(mask, measure_stack):
+    """
+    Compute the maximum of measure_stack values where the mask is equal to 1.
+    Args:
+        mask (numpy.ndarray): Binary mask where the measurement should be applied.
+        measure_stack (numpy.ndarray): Stack of measurements.
+    Returns:
+        measure_stack.dtype: Maximum value of measure_stack where the mask is 1.
+    """
+    return np.max(mask * measure_stack)
+def make_mask_s(p, melem, measure_stack):
+    """
+    Translate a mask to point p, ensuring correct treatment near the edges of the measure_stack.
+    Args:
+        p (tuple): Target point (r, c, z).
+        melem (numpy.ndarray): Structuring element for the mask.
+        measure_stack (numpy.ndarray): Stack of measurements.
+    Returns:
+        tuple: A tuple containing the translated mask and a section of the measure_stack.
+    """
+    #
+    R = [u//2 for u in melem.shape]
+    r, c, z = p
+    mask = np.zeros(melem.shape)
+    m_data = np.zeros(melem.shape)
+    s = measure_stack.shape
+    o_1, o_2, o_3 = max(R[0]-r, 0), max(R[1]-c, 0), max(R[2]-z,0)
+    e_1, e_2, e_3 = min(R[0]-r+s[0], 2*R[0]+1), min(R[1]-c+s[1], 2*R[1]+1), min(R[2]-z+s[2], 2*R[2]+1)
+    m_data[o_1:e_1,o_2:e_2,o_3:e_3] = measure_stack[max(r-R[0],0):min(r+R[0]+1,s[0]),max(c-R[1],0):min(c+R[1]+1,s[1]),max(z-R[2],0):min(z+R[2]+1, s[2])]
+    mask[o_1:e_1,o_2:e_2,o_3:e_3] = melem[o_1:e_1,o_2:e_2,o_3:e_3]
+    return mask, m_data
+def measure_at_point(p, melem, measure_stack, op='mean'):
+    """
+    Measure the mean or max value of measure_stack around a specific point using a structuring element.
+    Args:
+        p (tuple): Target point (r, c, z).
+        melem (numpy.ndarray): Structuring element for the mask.
+        measure_stack (numpy.ndarray): Stack of measurements.
+        op (str, optional): Operation to be applied; either 'mean' or 'max'. Default is 'mean'.
+    Returns:
+        float: Measured value based on the specified operation.
+    """
+    p = map(int, p)
+    if op=='mean':
+        mask, m_data = make_mask_s(p, melem, measure_stack)
+        melem_size = np.sum(mask)
+        return float(measure_from_mask(mask, m_data) / melem_size)
+    else:
+        mask, m_data = make_mask_s(p, melem, measure_stack)
+        return float(max_from_mask(mask, m_data))
+# Generate spherical region
+def make_sphere(R=5, z_scale_ratio=2.3):
+    """
+    Generate a binary representation of a sphere in 3D space.
+    Args:
+        R (int, optional): Radius of the sphere. Default is 5. Centred on the centre of the middle voxel.
+                           Includes all voxels whose centre is precisely R from the middle voxel.
+        z_scale_ratio (float, optional): Scaling factor for the z-axis. Default is 2.3.
+    Returns:
+        numpy.ndarray: Binary representation of the sphere.
+    """
+    R_z = int(math.ceil(R/z_scale_ratio))
+    x, y, z = np.ogrid[-R:R+1, -R:R+1, -R_z:R_z+1]
+    sphere = x**2 + y**2 + (z_scale_ratio * z)**2 <= R**2
+    return sphere
+# Measure the values of measure_stack at each of the points of points_list in turn.
+# Measurement is the mean / max (specified by op) on the spherical region about each point
+def measure_all_with_sphere(points_list, measure_stack, op='mean', R=5, z_scale_ratio=2.3):
+    """
+    Measure the values of measure_stack at each point in a list using a spherical region.
+    Args:
+        points_list (list): List of points (r, c, z) to be measured.
+        measure_stack (numpy.ndarray): Stack of measurements.
+        op (str, optional): Operation to be applied; either 'mean' or 'max'. Default is 'mean'.
+        R (int, optional): Radius of the sphere. Default is 5.
+        z_scale_ratio (float, optional): Scaling factor for the z-axis. Default is 2.3.
+    Returns:
+        list: List of measured values for each point.
+    """
+    melem = make_sphere(R, z_scale_ratio)
+    measure_func = lambda p: measure_at_point(p, melem, measure_stack, op)
+    return list(map(measure_func, points_list))
+# Measure fluorescence levels along ordered skeleton
+def measure_chrom2(path, intensity, config):
+    """
+    Measure fluorescence levels along an ordered skeleton.
+    Args:
+        path (list): List of ordered path points (r, c, z).
+        intensity (numpy.ndarray): 3D fluorescence data.
+        config (dict): Configuration dictionary containing 'z_res', 'xy_res', and 'sphere_radius' values.
+    Returns:
+        tuple: A tuple containing the visualization, mean measurements, and max measurements along the path.
+    """
+    # Calculate size of spheroid used for measurement
+    scale_ratio = config['z_res']/config['xy_res']
+    sphere_xy_radius = int(math.ceil(config['sphere_radius']/config['xy_res']))
+    vis = visualise_ordering(path, dim=intensity.shape, wr=sphere_xy_radius, wc=sphere_xy_radius)
+    measurements = measure_all_with_sphere(path, intensity, op='mean', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
+    measurements_max = measure_all_with_sphere(path, intensity, op='max', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
+    return vis, measurements, measurements_max
+def extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config):
+    """
+    Extract peak information from given traces and compile them into a DataFrame.
+    Args:
+    - cell_id (int or str): Identifier for the cell being analyzed.
+    - all_paths (list of lists): Contains ordered path points for multiple paths.
+    - path_lengths (list of floats): List containing lengths of each path in all_paths.
+    - measured_traces (list of lists): Contains fluorescence measurement values along the paths.
+    - config (dict): Configuration dictionary containing:
+        - 'peak_threshold': Threshold value to determine a peak in the trace.
+        - 'sphere_radius': Radius of the sphere used in fluorescence measurement.
+    Returns:
+    - pd.DataFrame: DataFrame containing peak information for each path.
+    - list of lists: Absolute intensities of the detected foci.
+    - list of lists: Index positions of the detected foci.
+    - list of lists: Absolute focus intensity threshold for each trace.
+    - list of numpy.ndarray: For each trace, distances of each point from start of trace in microns
+    """
+    n_paths = len(all_paths)
+    data = []
+    foci_absolute_intensity, foci_position, foci_position_index, screened_foci_data, trace_median_intensities, trace_thresholds = analyse_traces(all_paths, path_lengths, measured_traces, config)
+    # Normalize foci intensities (for quantification) using trace medians as estimates of background
+    foci_intensities = []
+    for path_foci_abs_int, tmi in zip(foci_absolute_intensity, trace_median_intensities):
+        foci_intensities.extend(list(path_foci_abs_int - tmi))
+    # Divide all foci intensities by the mean within the cell
+    mean_intensity = np.mean(foci_intensities)
+    trace_positions = []
+    for i in range(n_paths):
+        # Calculate real (Euclidean) distance of each point along the traced path
+        pl = calculate_path_length_partials(all_paths[i], (config['xy_res'], config['xy_res'], config['z_res']))
+        path_data = { 'Cell_ID':cell_id,
+                      'Trace': i+1,
+                      'SNT_trace_length(um)': path_lengths[i],
+                      'Measured_trace_length(um)': pl[-1],
+                      'Trace_median_intensity': trace_median_intensities[i],
+                      'Detection_sphere_radius(um)': config['sphere_radius'],
+                      'Screening_distance(voxels)': config['screening_distance'],
+                      'Foci_ID_threshold': config['peak_threshold'],
+                      'Trace_foci_number': len(foci_position_index[i]) }
+        for j, (idx, u,v) in enumerate(zip(foci_position_index[i], foci_position[i], foci_absolute_intensity[i])):
+            if config['use_corrected_positions']:
+                # Use the calculated position along the traced path
+                path_data[f'Foci_{j+1}_position(um)'] = pl[idx]
+            else:
+                # Use the measured trace length (from SNT), and assume all steps of path are approximately the same length
+                path_data[f'Foci_{j+1}_position(um)'] = u
+            # The original measured intensity (mean in spheroid around detected peak)
+            path_data[f'Foci_{j+1}_absolute_intensity'] = v
+            # Measure relative intensity by removing per-trace background and dividing by cell total
+            path_data[f'Foci_{j+1}_relative_intensity'] = (v - trace_median_intensities[i])/mean_intensity
+        data.append(path_data)
+        trace_positions.append(pl)
+    return pd.DataFrame(data), foci_absolute_intensity, foci_position_index, screened_foci_data, trace_thresholds, trace_positions
+def analyse_paths(cell_id,
+                  foci_file,
+                  traces_file,
+                  config
+    ):
+    """
+    Analyzes paths for the given cell ID using provided foci and trace files.
+    Args:
+        cell_id (int/str): Identifier for the cell.
+        foci_file (str): Path to the foci image file.
+        traces_file (str): Path to the XML traces file.
+        config (dict): Configuration dictionary containing necessary parameters such as resolutions and thresholds.
+    Returns:
+        tuple: A tuple containing an overlay image of the traces, visualization images for each trace,
+               a figure with plotted measurements, and a dataframe with extracted peaks.
+    """
+    # Read stack
+    foci_stack = tifffile.imread(foci_file)
+    # If 2D add additional (z) dimension
+    if foci_stack.ndim==2:
+        foci_stack = foci_stack[None,:,:]
+    all_paths, path_lengths = get_paths_from_traces_file(traces_file)
+    all_trace_vis = [] # Per-path visualizations
+    all_m = [] # Per-path measured intensities
+    for p in all_paths:
+        # Measure intensity along path - transpose the stack ZYX -> XYZ
+        vis, m, _ = measure_chrom2(p,foci_stack.transpose(2,1,0), config)
+        all_trace_vis.append(vis)
+        all_m.append(m)
+    # Extract all data from paths and traces
+    extracted_peaks, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, all_m, config)
+    # Plot per-path measured intensities and indicate foci
+    n_cols = 2
+    n_rows = (len(all_paths)+n_cols-1)//n_cols
+    fig, ax = plt.subplots(n_rows,n_cols, figsize=(5*n_cols, 3*n_rows))
+    ax = ax.flatten()
+    for i, m in enumerate(all_m):
+        ax[i].set_title(f'Trace {i+1}')
+        ax[i].plot(trace_positions[i], m)
+        if len(foci_pos_index[i]):
+            # Plot detected foci
+            ax[i].plot(trace_positions[i][foci_pos_index[i]], np.array(m)[foci_pos_index[i]], 'rx')
+        if len(screened_foci_data[i]):
+            # Indicate screened foci by gray circles on plots
+            screened_foci_pos_index = [u.idx for u in screened_foci_data[i]]
+            ax[i].plot(trace_positions[i][screened_foci_pos_index], np.array(m)[screened_foci_pos_index], color=(0.5,0.5,0.5), marker='o', linestyle='None')
+        # Show per-trace intensity thresholds with red dotted lines
+        if trace_thresholds[i] is not None:
+            ax[i].axhline(trace_thresholds[i], c='r', ls=':')
+        ax[i].set_xlabel('Distance from start (um)')
+        ax[i].set_ylabel('Intensity')
+    # Hide excess plots
+    for i in range(len(all_m), n_cols*n_rows):
+        ax[i].axis('off')
+    plt.tight_layout()
+    trace_overlay = draw_paths(all_paths, foci_stack, foci_index=foci_pos_index, screened_foci_data=screened_foci_data)
+    return trace_overlay, all_trace_vis, fig, extracted_peaks

path_analysis/data_preprocess.py ADDED Viewed

	@@ -0,0 +1,395 @@

+from dataclasses import dataclass
+import numpy as np
+import scipy.linalg as la
+from scipy.signal import find_peaks
+from math import ceil
+def thin_peaks(peak_list, dmin=10, voxel_size=(1,1,1), return_larger_peaks=False):
+    """
+    Remove peaks within a specified distance of each other, retaining the peak with the highest intensity.
+    Args:
+    - peak_list (list of PeakData): Each element contains:
+        - pos (list of float): 3D coordinates of the peak.
+        - intensity (float): The intensity value of the peak.
+        - key (tuple): A unique identifier or index for the peak (#trace, #peak)
+    - dmin (float, optional): Minimum distance between peaks. peaks closer than this threshold will be thinned. Defaults to 10.
+    - return_larger_peaks (bool, optional): Indicate larger peak for each thinned peak
+    Returns:
+    - list of tuples: A list containing keys of the removed peaks.
+    if return_larger_peaks
+    - list of tuples: A list containing the keys of the larger peak causing the peak to be removed
+    Notes:
+    - The function uses the L2 norm (Euclidean distance) to compute the distance between peaks.
+    - When two peaks are within `dmin` distance, the peak with the lower intensity is removed.
+    """
+    removed_peaks = []
+    removed_larger_peaks = []
+    for i in range(len(peak_list)):
+        if peak_list[i].key in removed_peaks:
+            continue
+        for j in range(len(peak_list)):
+            if i==j:
+                continue
+            if peak_list[j].key in removed_peaks:
+                continue
+            d = (np.array(peak_list[i].pos) - np.array(peak_list[j].pos))*np.array(voxel_size)
+            d = la.norm(d)
+            if d<dmin:
+                hi = peak_list[i].intensity
+                hj = peak_list[j].intensity
+                if hi<hj:
+                    removed_peaks.append(peak_list[i].key)
+                    removed_larger_peaks.append(peak_list[j].key)
+                    break
+                else:
+                    removed_peaks.append(peak_list[j].key)
+                    removed_larger_peaks.append(peak_list[i].key)
+    if return_larger_peaks:
+        return removed_peaks, removed_larger_peaks
+    else:
+        return removed_peaks
+@dataclass
+class CellData(object):
+    """Represents data related to a single cell.
+    Attributes:
+        pathdata_list (list): A list of PathData objects representing the various paths associated with the cell.
+    """
+    pathdata_list: list
+@dataclass
+class RemovedPeakData(object):
+    """Represents data related to a removed peak
+    Attributes:
+        idx (int): Index of peak along path
+        screening_peak (tuple): (path_idx, position along path) for screening peak
+    """
+    idx: int
+    screening_peak: tuple
+@dataclass
+class PathData(object):
+    """Represents data related to a specific path in the cell.
+    This dataclass encapsulates information about the peaks,
+    the defining points, the fluorescence values, and the path length of a specific path.
+    Attributes: peaks (list): List of peaks in the path (indicies of positions in points, o_intensity).
+        removed_peaks (list): List of peaks in the path which have been removed because of a nearby larger peak
+        points (list): List of points defining the path.
+        o_intensity (list): List of (unnormalized) fluorescence intensity values along the path
+        SC_length (float): Length of the path.
+    """
+    peaks: list
+    removed_peaks: list
+    points: list
+    o_intensity: list
+    SC_length: float
+@dataclass
+class PeakData(object):
+    pos: tuple
+    intensity: float
+    key: tuple
+def find_peaks2(v, distance=5,  prominence=0.5):
+    """
+    Find peaks in a 1D array with extended boundary handling.
+    The function pads the input array at both ends to handle boundary peaks. It then identifies peaks in the extended array
+    and maps them back to the original input array.
+    Args:
+    - v (numpy.ndarray): 1D input array in which to find peaks.
+    - distance (int, optional): Minimum number of array elements that separate two peaks. Defaults to 5.
+    - prominence (float, optional): Minimum prominence required for a peak to be identified. Defaults to 0.5.
+    Returns:
+    - list of int: List containing the indices of the identified peaks in the original input array.
+    - dict: Information about the properties of the identified peaks (as returned by scipy.signal.find_peaks).
+    """
+    pad = int(ceil(distance))+1
+    v_ext = np.concatenate([np.ones((pad,), dtype=v.dtype)*np.min(v), v, np.ones((pad,), dtype=v.dtype)*np.min(v)])
+    assert(len(v_ext) == len(v)+2*pad)
+    peaks, _ = find_peaks(v_ext, distance=distance, prominence=prominence)
+    peaks = peaks - pad
+    n_peaks = []
+    for i in peaks:
+        if 0<=i<len(v):
+            n_peaks.append(i)
+        else:
+            raise Exception
+    return n_peaks, _
+def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence, dmin=10):
+    """
+    Process traces of cells to extract peak information and organize the data.
+    The function normalizes fluorescence data, finds peaks, refines peak information,
+    removes unwanted peaks that might be due to close proximity of bright peaks from
+    other paths, and organizes all the information into a structured data format.
+    Args:
+        all_paths (list of list of tuples): A list containing paths, where each path is
+                                            represented as a list of 3D coordinate tuples.
+        path_lengths (list of float): List of path lengths corresponding to the provided paths.
+        measured_trace_fluorescence (list of list of float): A list containing fluorescence
+                                                            data corresponding to each path point.
+        dmin (float): Distance below which brighter peaks screen less bright ones.
+    Returns:
+        CellData: An object containing organized peak and path data for a given cell.
+    Note:
+        - The function assumes that each path and its corresponding length and fluorescence data
+          are positioned at the same index in their respective lists.
+    """
+    cell_peaks = []
+    for points, o_intensity in zip(all_paths, measured_trace_fluorescence):
+        # For peak determination normalize each trace to have mean zero and s.d. 1
+        intensity_normalized = (o_intensity - np.mean(o_intensity))/np.std(o_intensity)
+        # Find peaks - these will be further refined later
+        p,_ = find_peaks2(intensity_normalized, distance=5,  prominence=0.5*np.std(intensity_normalized))
+        peaks = np.array(p, dtype=np.int32)
+        # Store peak data - using original values, not normalized ones
+        peak_mean_heights = [ o_intensity[u] for u in peaks ]
+        peak_points = [ points[u] for u in peaks ]
+        cell_peaks.append((peaks, peak_points, peak_mean_heights))
+    # Eliminate peaks which have another larger peak nearby (in 3D space, on any chromosome).
+    # This aims to remove small peaks in the mean intensity generated when an  SC passes close
+    # to a bright peak on another SC - this is nearby in space, but brighter.
+    to_thin = []
+    for k in range(len(cell_peaks)):
+        for u in range(len(cell_peaks[k][0])):
+            to_thin.append(PeakData(pos=cell_peaks[k][1][u], intensity=cell_peaks[k][2][u], key=(k, u)))
+    # Exclude any peak with a nearby brighter peak (on any SC)
+    removed_peaks, removed_larger_peaks = thin_peaks(to_thin, return_larger_peaks=True, dmin=dmin)
+    # Clean up and remove these peaks
+    new_cell_peaks = []
+    removed_cell_peaks = []
+    removed_cell_peaks_larger = []
+    for path_idx in range(len(cell_peaks)):
+        path_retained_peaks = []
+        path_removed_peaks = []
+        path_peaks = cell_peaks[path_idx][0]
+        for peak_idx in range(len(path_peaks)):
+            if (path_idx, peak_idx) not in removed_peaks:
+                path_retained_peaks.append(path_peaks[peak_idx])
+            else:
+                # What's the larger point?
+                idx = removed_peaks.index((path_idx, peak_idx))
+                larger_path, larger_idx = removed_larger_peaks[idx]
+                path_removed_peaks.append(RemovedPeakData(idx=path_peaks[peak_idx], screening_peak=(larger_path, cell_peaks[larger_path][0][larger_idx])))
+                ###
+        new_cell_peaks.append(path_retained_peaks)
+        removed_cell_peaks.append(path_removed_peaks)
+    cell_peaks = new_cell_peaks
+    pd_list = []
+    # Save peak positions, absolute intensity intensities, and length for each SC
+    for k in range(len(all_paths)):
+        points, o_intensity = all_paths[k], measured_trace_fluorescence[k]
+        peaks = cell_peaks[k]
+        removed_peaks = removed_cell_peaks[k]
+        pd = PathData(peaks=peaks, removed_peaks=removed_peaks, points=points, o_intensity=o_intensity, SC_length=path_lengths[k])
+        pd_list.append(pd)
+    cd = CellData(pathdata_list=pd_list)
+    return cd
+alpha_max = 0.4
+# Criterion used for identifying peak as a focus - normalized (with mean and s.d.)
+# intensity levels being above 0.4 time maximum peak level
+def focus_criterion(pos, v, alpha=alpha_max):
+    """
+    Identify and return positions where values in the array `v` exceed a certain threshold.
+    The threshold is computed as `alpha` times the maximum value in `v`.
+    Args:
+    - pos (numpy.ndarray): Array of positions.
+    - v (numpy.ndarray): 1D array of values, e.g., intensities.
+    - alpha (float, optional): A scaling factor for the threshold. Defaults to `alpha_max`.
+    Returns:
+    - numpy.ndarray: Array of positions where corresponding values in `v` exceed the threshold.
+    """
+    if len(v):
+        idx = (v>=alpha*np.max(v))
+        return np.array(pos[idx])
+    else:
+        return np.array([], dtype=np.int32)
+def analyse_celldata(cell_data, config):
+    """
+    Analyse the provided cell data to extract focus-related information.
+    Args:
+        cd (CellData): An instance of the CellData class containing path data information.
+        config (dictionary): Configuration dictionary containing 'peak_threshold' and 'threshold_type'
+                             'peak_threshold' (float) - threshold for calling peaks as foci
+                             'threshold_type' (str) = 'per-trace', 'per-foci'
+    Returns:
+        tuple: A tuple containing:
+            - foci_rel_intensity (list): List of relative intensities for the detected foci.
+            - foci_pos (list): List of absolute positions of the detected foci.
+            - foci_pos_index (list): List of indices of the detected foci.
+            - screened_foci_data (list): List of RemovedPeakData indicating positions of removed peaks and the index of the larger peak
+            - trace_median_intensities (list): Per-trace median intensity
+            - trace_thresholds (list): Per-trace absolute threshold for calling peaks as foci
+    """
+    foci_abs_intensity = []
+    foci_pos = []
+    foci_pos_index = []
+    screened_foci_data = []
+    trace_median_intensities = []
+    trace_thresholds = []
+    peak_threshold = config['peak_threshold']
+    threshold_type = config['threshold_type']
+    if threshold_type == 'per-trace':
+        """
+        Call extracted peaks as foci if intensity - trace_mean > peak_threshold * (trace_max_foci_intensity - trace_mean)
+        """
+        for path_data in cell_data.pathdata_list:
+            peaks = np.array(path_data.peaks, dtype=np.int32)
+            # Normalize extracted fluorescent intensities by subtracting mean (and dividing
+            # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_intensity)
+            h = h - np.mean(h)
+            h = h/np.std(h)
+            # Extract foci according to criterion
+            foci_idx = focus_criterion(peaks, h[peaks], peak_threshold)
+            #
+            removed_peaks = path_data.removed_peaks
+            removed_peaks_idx = np.array([u.idx for u in removed_peaks], dtype=np.int32)
+            if len(peaks):
+                trace_thresholds.append((1-peak_threshold)*np.mean(path_data.o_intensity) + peak_threshold*np.max(np.array(path_data.o_intensity)[peaks]))
+            else:
+                trace_thresholds.append(None)
+            if len(removed_peaks):
+                if len(peaks):
+                    threshold = (1-peak_threshold)*np.mean(path_data.o_intensity) + peak_threshold*np.max(np.array(path_data.o_intensity)[peaks])
+                else:
+                    threshold = float('-inf')
+                removed_peak_heights = np.array(path_data.o_intensity)[removed_peaks_idx]
+                screened_foci_idx = np.where(removed_peak_heights>threshold)[0]
+                screened_foci_data.append([removed_peaks[i] for i in screened_foci_idx])
+            else:
+                screened_foci_data.append([])
+            pos_abs = (foci_idx/len(path_data.points))*path_data.SC_length
+            foci_pos.append(pos_abs)
+            foci_abs_intensity.append(np.array(path_data.o_intensity)[foci_idx])
+            foci_pos_index.append(foci_idx)
+            trace_median_intensities.append(np.median(path_data.o_intensity))
+    elif threshold_type == 'per-cell':
+        """
+        Call extracted peaks as foci if intensity - trace_mean > peak_threshold * max(intensity - trace_mean)
+        """
+        max_cell_intensity = float("-inf")
+        for path_data in cell_data.pathdata_list:
+            # Normalize extracted fluorescent intensities by subtracting mean (and dividing
+            # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_intensity)
+            h = h - np.mean(h)
+            max_cell_intensity = max(max_cell_intensity, np.max(h))
+        for path_data in cell_data.pathdata_list:
+            peaks = np.array(path_data.peaks, dtype=np.int32)
+            # Normalize extracted fluorescent intensities by subtracting mean (and dividing
+            # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_intensity)
+            h = h - np.mean(h)
+            foci_idx = peaks[h[peaks]>peak_threshold*max_cell_intensity]
+            removed_peaks = path_data.removed_peaks
+            removed_peaks_idx = np.array([u.idx for u in removed_peaks], dtype=np.int32)
+            trace_thresholds.append(np.mean(path_data.o_intensity) + peak_threshold*max_cell_intensity)
+            if len(removed_peaks):
+                threshold = np.mean(path_data.o_intensity) + peak_threshold*max_cell_intensity
+                removed_peak_heights = np.array(path_data.o_intensity)[removed_peaks_idx]
+                screened_foci_idx = np.where(removed_peak_heights>threshold)[0]
+                screened_foci_data.append([removed_peaks[i] for i in screened_foci_idx])
+            else:
+                screened_foci_data.append([])
+            pos_abs = (foci_idx/len(path_data.points))*path_data.SC_length
+            foci_pos.append(pos_abs)
+            foci_abs_intensity.append(np.array(path_data.o_intensity)[foci_idx])
+            foci_pos_index.append(foci_idx)
+            trace_median_intensities.append(np.median(path_data.o_intensity))
+    else:
+        raise NotImplementedError
+    return foci_abs_intensity, foci_pos, foci_pos_index, screened_foci_data, trace_median_intensities, trace_thresholds
+def analyse_traces(all_paths, path_lengths, measured_trace_fluorescence, config):
+    cd = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence, dmin=config['screening_distance'])
+    return analyse_celldata(cd, config)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pillow
+tifffile
+matplotlib
+numpy
+lxml
+pandas
+scipy

setup.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from setuptools import setup, find_packages
+setup(
+    name='path_analysis',
+    version='0.1.0',
+    description='A brief description of your package',
+    author='Your Name',
+    author_email='youremail@example.com',
+    url='https://github.com/yourusername/yourrepository',  # if you have a repo for the project
+    packages=find_packages(),  # or specify manually: ['your_package', 'your_package.submodule', ...]
+    install_requires=[
+        'numpy',  # for example, if your package needs numpy
+        'gradio',
+        # ... other dependencies
+    ],
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        # ... other classifiers
+    ],
+    python_requires='>=3.6',  # your project's Python version requirement
+    keywords='some keywords related to your project',
+    # ... other parameters
+)

tests/__init__.py ADDED Viewed

File without changes

tests/test_analyse.py ADDED Viewed

	@@ -0,0 +1,370 @@

+import pytest
+from path_analysis.analyse import *
+from path_analysis.data_preprocess import RemovedPeakData
+import numpy as np
+from math import pi
+import xml.etree.ElementTree as ET
+from PIL import ImageChops
+def test_draw_paths_no_error():
+    all_paths = [[[0, 0], [1, 1]], [[2, 2], [3, 3]]]
+    foci_stack = np.zeros((5, 5, 5))
+    foci_stack[0,0,0] = 1.0
+    foci_index = [[0], [1]]
+    r = 3
+    try:
+        im = draw_paths(all_paths, foci_stack, foci_index, r)
+    except Exception as e:
+        pytest.fail(f"draw_paths raised an exception: {e}")
+def test_draw_paths_image_size():
+    all_paths = [[[0, 0], [1, 1]], [[2, 2], [3, 3]]]
+    foci_stack = np.zeros((5, 5, 5))
+    foci_stack[0,0,0] = 1.0
+    foci_index = [[0], [1]]
+    r = 3
+    im = draw_paths(all_paths, foci_stack, foci_index, r)
+    assert im.size == (5, 5), f"Expected image size (5, 5), got {im.size}"
+def test_draw_paths_image_modified():
+    all_paths = [[[0, 0], [1, 1]], [[2, 2], [3, 3]]]
+    foci_stack = np.zeros((5, 5, 5))
+    foci_stack[0,0,0] = 1.0
+    foci_index = [[0], [1]]
+    r = 3
+    im = draw_paths(all_paths, foci_stack, foci_index, r)
+    blank_image = Image.new("RGB", (5, 5), "black")
+    # Check if the image is not entirely black (i.e., has been modified)
+    diff = ImageChops.difference(im, blank_image)
+    assert diff.getbbox() is not None, "The image has not been modified"
+def test_calculate_path_length_partials_default_voxel():
+    point_list = [(0, 0, 0), (1, 0, 0), (1, 1, 1)]
+    expected_result = np.array([0.0, 1.0, 1.0+np.sqrt(2)])
+    result = calculate_path_length_partials(point_list)
+    np.testing.assert_allclose(result, expected_result, atol=1e-5)
+def test_calculate_path_length_partials_custom_voxel():
+    point_list = [(0, 0, 0), (1, 0, 0), (1, 1, 0)]
+    voxel_size = (1, 2, 1)
+    expected_result = np.array([0.0, 1.0, 3.0])
+    result = calculate_path_length_partials(point_list, voxel_size=voxel_size)
+    np.testing.assert_allclose(result, expected_result, atol=1e-5)
+def test_calculate_path_length_partials_single_point():
+    point_list = [(0, 0, 0)]
+    expected_result = np.array([0.0])
+    result = calculate_path_length_partials(point_list)
+    np.testing.assert_allclose(result, expected_result, atol=1e-5)
+def test_get_paths_from_traces_file():
+    # Mock the XML traces file content
+    xml_content = '''<?xml version="1.0"?>
+    <root>
+        <path reallength="5.0">
+            <point x="1" y="2" z="3"/>
+            <point x="4" y="5" z="6"/>
+        </path>
+        <path reallength="10.0">
+            <point x="7" y="8" z="9"/>
+            <point x="10" y="11" z="12"/>
+        </path>
+    </root>
+    '''
+    # Create a temporary XML file
+    with open("temp_traces.xml", "w") as f:
+        f.write(xml_content)
+    all_paths, path_lengths = get_paths_from_traces_file("temp_traces.xml")
+    expected_paths = [[(1, 2, 3), (4, 5, 6)], [(7, 8, 9), (10, 11, 12)]]
+    expected_lengths = [5.0, 10.0]
+    assert all_paths == expected_paths, f"Expected paths {expected_paths}, but got {all_paths}"
+    assert path_lengths == expected_lengths, f"Expected lengths {expected_lengths}, but got {path_lengths}"
+    # Clean up temporary file
+    import os
+    os.remove("temp_traces.xml")
+def test_measure_chrom2():
+    # Mock data
+    path = [(2, 3, 4), (4, 5, 6), (9, 9, 9)]  # Sample ordered path points
+    intensity = np.random.rand(10, 10, 10)  # Random 3D fluorescence data
+    config = {
+        'z_res': 1,
+        'xy_res': 0.5,
+        'sphere_radius': 2.5
+    }
+    # Function call
+    _, measurements, measurements_max = measure_chrom2(path, intensity, config)
+    # Assertions
+    assert len(measurements) == len(path), "Measurements length should match path length"
+    assert len(measurements_max) == len(path), "Max measurements length should match path length"
+    assert all(0 <= val <= 1 for val in measurements), "All mean measurements should be between 0 and 1 for this mock data"
+    assert all(0 <= val <= 1 for val in measurements_max), "All max measurements should be between 0 and 1 for this mock data"
+def test_measure_chrom2_z():
+    # Mock data
+    path = [(2, 3, 4), (4, 5, 6)]  # Sample ordered path points
+    _,_,intensity = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
+    config = {
+        'z_res': 1,
+        'xy_res': 0.5,
+        'sphere_radius': 2.5
+    }
+    # Function call
+    _, measurements, measurements_max = measure_chrom2(path, intensity, config)
+    # Assertions
+    assert len(measurements) == len(path), "Measurements length should match path length"
+    assert len(measurements_max) == len(path), "Max measurements length should match path length"
+    assert all(measurements == np.array([4,6]))
+    assert all(measurements_max == np.array([6,8]))
+def test_measure_chrom2_z2():
+    # Mock data
+    path = [(0,0,0), (2, 3, 4), (4, 5, 6)]  # Sample ordered path points
+    _,_,intensity = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
+    config = {
+        'z_res': 0.25,
+        'xy_res': 0.5,
+        'sphere_radius': 2.5
+    }
+    # Function call
+    _, measurements, measurements_max = measure_chrom2(path, intensity, config)
+    # Assertions
+    assert len(measurements) == len(path), "Measurements length should match path length"
+    assert len(measurements_max) == len(path), "Max measurements length should match path length"
+    assert all(measurements_max == np.array([9,9,9]))
+def test_measure_from_mask():
+    mask = np.array([
+        [0, 1, 0],
+        [1, 1, 1],
+        [0, 1, 0]
+    ])
+    measure_stack = np.array([
+        [2, 4, 2],
+        [4, 8, 4],
+        [2, 4, 2]
+    ])
+    result = measure_from_mask(mask, measure_stack)
+    assert result == 24  # Expected sum: 4+4+8+4+4
+def test_max_from_mask():
+    mask = np.array([
+        [0, 1, 0],
+        [1, 1, 1],
+        [0, 1, 0]
+    ])
+    measure_stack = np.array([
+        [2, 5, 2],
+        [4, 8, 3],
+        [2, 7, 2]
+    ])
+    result = max_from_mask(mask, measure_stack)
+    assert result == 8  # Expected max: 8
+def test_measure_at_point_mean():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [3, 3, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (1, 1, 1)
+    melem = np.ones((3, 3, 3))
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 4, "Expected mean: 4"
+def test_measure_at_point_mean_off1():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (0, 0, 0)
+    melem = np.ones((3, 3, 3))
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 4.5,  "Expected mean: 4.5"
+def test_measure_at_point_mean_off2():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (3, 1, 1)
+    melem = np.ones((3, 3, 3))
+    print(measure_stack[p[0], p[1], p[2]])
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 32/18  # Expected mean: 4.5
+def test_measure_at_point_mean_off3():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (3, 1, 1)
+    melem = np.ones((1, 1, 3))
+    print(measure_stack[p[0], p[1], p[2]])
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 0,  "Expected mean: 4.5"
+def test_measure_at_point_mean_off3():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (3, 1, 1)
+    melem = np.ones((3, 1, 1))
+    print(measure_stack[p[0], p[1], p[2]])
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 3, "Expected mean: 4.5"
+def test_measure_at_point_max():
+    measure_stack = np.array([
+        [[2, 2, 2], [4, 4, 4], [2, 2, 2]],
+        [[4, 5, 4], [8, 7, 9], [4, 4, 4]],
+        [[2, 2, 2], [4, 4, 4], [2, 2, 2]]
+    ])
+    p = (1, 1, 1)
+    melem = np.ones((3, 3, 3))
+    result = measure_at_point(p, melem, measure_stack, op='max')
+    assert result == 9, "Expected max: 9"
+def test_make_sphere_equal():
+    R = 5
+    z_scale_ratio = 1.0
+    sphere = make_sphere(R, z_scale_ratio)
+    # Check the returned type
+    assert isinstance(sphere, np.ndarray), "Output should be a numpy ndarray"
+    # Check the shape
+    expected_shape = (2*R+1, 2*R+1, 2*R+1)
+    assert sphere.shape == expected_shape, f"Expected shape {expected_shape}, but got {sphere.shape}"
+    assert (sphere[:,:,::-1] == sphere).all(), f"Expected symmetrical mask"
+    assert (sphere[:,::-1,:] == sphere).all(), f"Expected symmetrical mask"
+    assert (sphere[::-1,:,:] == sphere).all(), f"Expected symmetrical mask"
+    assert abs(np.sum(sphere)-4/3*pi*R**3)<10, f"Expected approximate volume to be correct"
+    assert (sphere[R,R,0] == 1), f"Expected centre point on top plane to be within sphere"
+    assert (sphere[R+1,R,0] == 0), f"Expected point next to centre on top plane to be outside sphere"
+import pandas as pd
+def test_extract_peaks_basic():
+    cell_id = 1 # Simple per-cell tag
+    all_paths = [[[0, 0, 0], [1, 1, 0]]] # Single, simple path
+    path_lengths = [1.41]  # length of the above path
+    measured_traces = [[100, 200]]  # fluorescence along the path
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-cell', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
+    assert len(df) == 1, "Expected one row in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    assert foci_pos_index == [[1]]
+    assert foci_absolute_intensity == [[200]]
+    assert screened_foci_data == [[]]
+    assert trace_thresholds == [ [ 150+0.4*50] ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+def test_extract_peaks_multiple_paths():
+    cell_id = 1
+    all_paths = [[[0, 0, 0], [1, 1, 0]], [[1, 1, 200], [2, 2, 200]]]
+    path_lengths = [1.41, 1.41]
+    measured_traces = [[100, 200], [100, 140]]
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-trace', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
+    assert len(df) == 2, "Expected two rows in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1,1], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    print(foci_pos_index)
+    assert list(map(list, foci_pos_index)) == [[1],[1]]
+    assert list(map(list, foci_absolute_intensity)) == [[200],[140]]
+    assert trace_thresholds == [ 150+0.4*50, 120+0.4*20 ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+    assert screened_foci_data == [[],[]]
+def test_extract_peaks_multiple_paths_screened():
+    cell_id = 1
+    all_paths = [[[0, 0, 0], [1, 1, 0]], [[1, 1, 2], [2, 2, 2]]]
+    path_lengths = [1.41, 1.41]
+    measured_traces = [[100, 200], [100, 150]]
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-trace', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
+    assert len(df) == 2, "Expected two rows in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1,0], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    print(foci_pos_index)
+    assert list(map(list, foci_pos_index)) == [[1],[]]
+    assert list(map(list, foci_absolute_intensity)) == [[200],[]]
+    assert trace_thresholds == [ 150+0.4*50, None ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+    assert screened_foci_data == [[],[RemovedPeakData(idx=1, screening_peak=(0,1))]]
+def test_extract_peaks_multiple_paths_per_cell():
+    cell_id = 1
+    all_paths = [[[0, 0, 0], [1, 1, 0]], [[1, 1, 200], [2, 2, 200]]]
+    path_lengths = [1.41, 1.41]
+    measured_traces = [[100, 200], [100, 140]]
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-cell', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
+    assert len(df) == 2, "Expected two rows in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1,0], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    assert list(map(list, foci_pos_index)) == [[1],[]]
+    assert list(map(list, foci_absolute_intensity)) == [[200],[]]
+    assert trace_thresholds == [ 150+0.4*50, 120+0.4*50 ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+    assert screened_foci_data == [[],[]]

tests/test_preprocess.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from path_analysis.data_preprocess import *
+import numpy as np
+import pytest
+def test_thin_points():
+    # Define a sample point list
+    points = [
+        PeakData([0, 0, 0], 10, 0),
+        PeakData([1, 1, 1], 8, 1),
+        PeakData([10, 10, 10], 12, 2),
+        PeakData([10.5, 10.5, 10.5], 5, 3),
+        PeakData([20, 20, 20], 15, 4)
+    ]
+    # Call the thin_points function with dmin=5 (for example)
+    removed_indices = thin_peaks(points, dmin=5)
+    # Check results
+    # Point at index 1 ([1, 1, 1]) should be removed since it's within 5 units distance of point at index 0 and has lower intensity.
+    # Similarly, point at index 3 ([10.5, 10.5, 10.5]) should be removed as it's close to point at index 2 and has lower intensity.
+    assert set(removed_indices) == {1, 3}
+    # Another simple test to check if function does nothing when points are far apart
+    far_points = [
+        PeakData([0, 0, 0], 10, 0),
+        PeakData([100, 100, 100], 12, 1),
+        PeakData([200, 200, 200], 15, 2)
+    ]
+    removed_indices_far = thin_peaks(far_points, dmin=5)
+    assert len(removed_indices_far) == 0  # Expect no points to be removed
+def test_find_peaks2():
+    # Basic test
+    data = np.array([0, 0, 0, 0, 0, 0, 5, 0, 3, 0])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {6}  # Expected peaks at positions 6
+    # Basic test
+    data = np.array([0, 2, 0, 0, 0, 0, 0, 0, 0, 0])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {1}  # Expected peaks at positions 1
+    # Test with padding impacting peak detection
+    data = np.array([3, 2.9, 0, 0, 0, 3])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {0,5}  # Peaks at both ends
+    # Test with close peaks
+    data = np.array([3, 0, 3])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {2}  # Peak at right end only
+    # Test with close peaks
+    # Test with close peaks
+    data = np.array([3, 0, 3])
+    peaks, _ = find_peaks2(data, distance=1)
+    assert set(peaks) == {0,2}  # Peaks at both ends
+    # Test with close peaks
+    data = np.array([0, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3])
+    peaks, _ = find_peaks2(data, distance=1)
+    assert set(peaks) == {2,7}  # Peak at centre (rounded to the left) of groups of maximum values
+    # Test with prominence threshold
+    data = np.array([0, 1, 0, 0.4, 0])
+    peaks, _ = find_peaks2(data, prominence=0.5)
+    assert peaks == [1]  # Only the peak at position 1 meets the prominence threshold
+def test_focus_criterion():
+    pos = np.array([0, 1, 2, 3, 4, 6])
+    values = np.array([0.1, 0.5, 0.2, 0.8, 0.3, 0.9])
+    # Basic test
+    assert np.array_equal(focus_criterion(pos, values), np.array([1, 3, 6]))  # only values 0.8 and 0.9 exceed 0.4 times the max (which is 0.9)
+    # Empty test
+    assert np.array_equal(focus_criterion(np.array([]), np.array([])), np.array([]))
+    # Test with custom alpha
+    assert np.array_equal(focus_criterion(pos, values, alpha=0.5), np.array([1, 3, 6]))
+    # Test with a larger alpha
+    assert np.array_equal(focus_criterion(pos, values, alpha=1.0), [6])  # No values exceed the maximum value itself
+    # Test with all values below threshold
+    values = np.array([0.1, 0.2, 0.3, 0.4])
+    assert np.array_equal(focus_criterion(pos[:4], values), [1,2,3])  # All values are below 0.4 times the max (which is 0.4)
+@pytest.fixture
+def mock_data():
+    all_paths = [ [ (0,0,0), (0,2,0), (0,5,0), (0,10,0), (0,15,0), (0,20,0)], [ (1,20,0), (1,20,10), (1,20,20)  ] ] # Mock paths
+    path_lengths = [ 2.2, 2.3 ]  # Mock path lengths
+    measured_trace_fluorescence = [ [100, 8, 3, 2, 3, 49], [38, 2, 20] ]  # Mock fluorescence data
+    return all_paths, path_lengths, measured_trace_fluorescence
+def test_process_cell_traces_return_type(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    assert isinstance(result, CellData), f"Expected CellData but got {type(result)}"
+def test_process_cell_traces_pathdata_list_length(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    assert len(result.pathdata_list) == len(all_paths), f"Expected {len(all_paths)} but got {len(result.pathdata_list)}"
+def test_process_cell_traces_pathdata_path_lengths(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    path_lengths = [p.SC_length for p in result.pathdata_list]
+    expected_path_lengths = [2.2, 2.3]
+    assert  path_lengths == expected_path_lengths, f"Expected {expected_path_lengths} but got {path_lengths}"
+def test_process_cell_traces_peaks(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    print(result)
+    peaks = [p.peaks for p in result.pathdata_list]
+    assert peaks == [[0,5],[]]
+# Mock data
+@pytest.fixture
+def mock_celldata():
+    pathdata1 = PathData(peaks=[0, 5], points=[(0,0,0), (0,2,0), (0,5,0), (0,10,0), (0,15,0), (0,20,0)], removed_peaks=[], o_intensity=[100, 8, 3, 2, 3, 69], SC_length=2.2)
+    pathdata2 = PathData(peaks=[2], points=[(1,20,0), (1,20,10), (1,20,20) ], removed_peaks=[RemovedPeakData(0, (0,5))], o_intensity=[38, 2, 20], SC_length=2.3)
+    return CellData(pathdata_list=[pathdata1, pathdata2])
+def test_analyse_celldata(mock_celldata):
+    data_frame, foci_absolute_intensity, foci_position_index, dominated_foci_data, trace_median_intensity, trace_thresholds = analyse_celldata(mock_celldata, {'peak_threshold': 0.4, 'threshold_type':'per-trace'})
+    assert len(data_frame) == len(mock_celldata.pathdata_list), "Mismatch in dataframe length"
+    assert len(foci_absolute_intensity) == len(mock_celldata.pathdata_list), "Mismatch in relative intensities length"
+    assert len(foci_position_index) == len(mock_celldata.pathdata_list), "Mismatch in positions length"
+    assert list(map(list, foci_position_index)) == [[0, 5], [2]]
+def test_analyse_celldata_per_cell(mock_celldata):
+    data_frame, foci_absolute_intensity, foci_position_index, dominated_foci_data, trace_median_intensity, trace_thresholds = analyse_celldata(mock_celldata, {'peak_threshold': 0.4, 'threshold_type':'per-cell'})
+    assert len(data_frame) == len(mock_celldata.pathdata_list), "Mismatch in relative intensities length"
+    assert len(foci_absolute_intensity) == len(mock_celldata.pathdata_list), "Mismatch in positions length"
+    assert len(foci_position_index) == len(mock_celldata.pathdata_list), "Mismatch in position indices length"
+    assert list(map(list, foci_position_index)) == [[0, 5], []]

tests/test_results.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import pytest
+from path_analysis.analyse import *
+from path_analysis.data_preprocess import RemovedPeakData
+import numpy as np
+from math import pi
+import xml.etree.ElementTree as ET
+from PIL import ImageChops
+from pathlib import Path
+import matplotlib
+matplotlib.use('Agg')
+@pytest.fixture(scope="module")
+def script_loc(request):
+    '''Return the directory of the currently running test script'''
+    return Path(request.fspath).parent
+def test_image_1(script_loc):
+    config = {  'sphere_radius': 0.1984125,
+                'peak_threshold': 0.4,
+                'xy_res': 0.0396825,
+                'z_res': 0.0909184,
+                'threshold_type': 'per-cell',
+                'use_corrected_positions': True,
+                'screening_distance': 10,
+            }
+    data_loc = script_loc.parent.parent / 'test_data' / 'hei10 ++ 15.11.19 p22s2 image 9'
+    image_input = data_loc / 'HEI10.tif'
+    path_input = data_loc / 'SNT_Data.traces'
+    paths, traces, fig, extracted_peaks = analyse_paths('Cell', image_input, path_input, config)
+    assert np.allclose(extracted_peaks['SNT_trace_length(um)'], [61.47, 70.40, 51.93, 43.94, 62.24], atol=1e-2 )
+    assert np.allclose(extracted_peaks['SNT_trace_length(um)'], extracted_peaks['Measured_trace_length(um)'], atol=1e-8 )
+    assert list(extracted_peaks['Trace_foci_number']) == [2,3,2,2,3]
+def test_image_2(script_loc):
+    config = {  'sphere_radius': 0.1984125,
+                'peak_threshold': 0.4,
+                'xy_res': 0.0396825,
+                'z_res': 0.0909184,
+                'threshold_type': 'per-cell',
+                'use_corrected_positions': True,
+                'screening_distance': 10,
+              }
+    data_loc = script_loc.parent.parent / 'test_data' / 'z-optimised'
+    image_input = data_loc / 'HEI10.tif'
+    path_input = data_loc / 'ZYP1.traces'
+    paths, traces, fig, extracted_peaks = analyse_paths('Cell', image_input, path_input, config)
+    assert np.allclose(extracted_peaks['SNT_trace_length(um)'], extracted_peaks['Measured_trace_length(um)'], atol=1e-8 )
+    assert list(extracted_peaks['Trace_foci_number']) == [2,2,1,2,1]
+def test_image_3(script_loc):
+    config = {  'sphere_radius': 0.1984125,
+                'peak_threshold': 0.4,
+                'xy_res': 0.0396825,
+                'z_res': 0.1095510,
+                'threshold_type': 'per-trace',
+                'use_corrected_positions': True,
+                'screening_distance': 10,
+            }
+    data_loc = script_loc.parent.parent / 'test_data' / 'arenosa SN A1243 image 18-20230726T142725Z-001' / 'arenosa SN A1243 image 18'
+    image_input = data_loc / 'HEI10.tif'
+    path_input = data_loc / 'SNT_Data.traces'
+    paths, traces, fig, extracted_peaks = analyse_paths('Cell', image_input, path_input, config)
+    assert np.allclose(extracted_peaks['SNT_trace_length(um)'], extracted_peaks['Measured_trace_length(um)'], atol=1e-8 )
+    assert list(extracted_peaks['Trace_foci_number']) == [2,1,1,1,2,1,1,1]
+def test_image_4(script_loc):
+    config = {  'sphere_radius': 10.,
+                'peak_threshold': 0.4,
+                'xy_res': 1,
+                'z_res': 1,
+                'threshold_type': 'per-trace',
+                'use_corrected_positions': True,
+                'screening_distance': 10,
+            }
+    data_loc = script_loc.parent.parent / 'test_data' / 'mammalian 2D-20230821T180708Z-001' / 'mammalian 2D' / '1'
+    image_input = data_loc / 'C2-Pachytene SIM-1.tif'
+    path_input = data_loc / 'SNT_Data.traces'
+    paths, traces, fig, extracted_peaks = analyse_paths('Cell', image_input, path_input, config)
+    assert np.allclose(extracted_peaks['SNT_trace_length(um)'], extracted_peaks['Measured_trace_length(um)'], atol=1e-8 )
+    valid_results = [{1}, {1}, {2, 3}, {1, 2}, {1, 2}, {1}, {1}, {2}, {1}, {1}, {1, 2}, {1}, {1, 2}, {1, 2}, {1}, {1}, {1}, {1}, {1}]
+    measured = extracted_peaks['Trace_foci_number']
+    print(measured)
+    assert len(measured) == len(valid_results)
+    assert(all(m in v for m,v in zip(measured, valid_results)))
+def test_image_5(script_loc):
+    config = {  'sphere_radius': 0.3,
+                'peak_threshold': 0.4,
+                'xy_res': 0.1023810,
+                'z_res': 1,
+                'threshold_type': 'per-trace',
+                'use_corrected_positions': True,
+                'screening_distance': 10,
+            }
+    data_loc = script_loc.parent.parent / 'test_data' / 'mammalian 2D-20230821T180708Z-001' / 'mammalian 2D' / '2'
+    image_input = data_loc / 'C1-CNTD1FHFH CSHA 1in5000 22612 Slide 6-102-1.tif'
+    path_input = data_loc / 'SNT_Data.traces'
+    paths, traces, fig, extracted_peaks = analyse_paths('Cell', image_input, path_input, config)
+    assert np.allclose(extracted_peaks['SNT_trace_length(um)'], extracted_peaks['Measured_trace_length(um)'], atol=1e-8 )
+    valid_results = [1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1]
+    measured = extracted_peaks['Trace_foci_number']
+    assert list(measured) == valid_results