Spaces:

JFoz
/

test_path_analysis

Sleeping

App Files Files Community

foz commited on Aug 22, 2023

Commit

0dab632

1 Parent(s): 020d824

Add more options and tests

Browse files

Files changed (8) hide show

app.py +39 -9
path_analysis/__init__.py +0 -0
path_analysis/analyse.py +375 -0
path_analysis/data_preprocess.py +314 -0
setup.py +29 -0
tests/__init__.py +0 -0
tests/test_analyse.py +223 -0
tests/test_preprocess.py +139 -0

app.py CHANGED Viewed

@@ -2,20 +2,19 @@
 import gradio as gr
 from tifffile import imread
 from PIL import Image
-import matplotlib.pyplot as plt
-from analyse import analyse_paths
 import numpy as np
-def process(cell_id, foci_file, traces_file):
-    paths, traces, fig, extracted_peaks = analyse_paths(cell_id, foci_file.name, traces_file.name)
-    extracted_peaks.to_csv('tmp')
-    return paths, [Image.fromarray(im) for im in traces], fig, extracted_peaks, 'tmp'
 def preview_image(file1):
     if file1:
         im = imread(file1.name)
-        print(im.shape)
-        return Image.fromarray(np.max(im, axis=0))
     else:
         return None
@@ -23,12 +22,27 @@ def preview_image(file1):
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             cellid_input = gr.Textbox(label="Cell ID", placeholder="Image_1")
             image_input = gr.File(label="Input foci image")
             image_preview = gr.Image(label="Max projection of foci image")
             image_input.change(fn=preview_image, inputs=image_input, outputs=image_preview)
             path_input = gr.File(label="SNT traces file")
         with gr.Column():
             trace_output = gr.Image(label="Overlayed paths")
             image_output=gr.Gallery(label="Traced paths")
@@ -36,9 +50,25 @@ with gr.Blocks() as demo:
             data_output=gr.DataFrame(label="Detected peak data")#, "Peak 1 pos", "Peak 1 int"])
             data_file_output=gr.File(label="Output data file (.csv)")
     with gr.Row():
         greet_btn = gr.Button("Process")
-        greet_btn.click(fn=process, inputs=[cellid_input, image_input, path_input], outputs=[trace_output, image_output, plot_output, data_output, data_file_output], api_name="process")
 if __name__ == "__main__":

 import gradio as gr
 from tifffile import imread
 from PIL import Image
+from path_analysis.analyse import analyse_paths
 import numpy as np
+# Function to preview the imported image
 def preview_image(file1):
     if file1:
         im = imread(file1.name)
+        print(im.ndim, im.shape)
+        if im.ndim>2:
+            return Image.fromarray(np.max(im, axis=0))
+        else:
+            return Image.fromarray(im)
     else:
         return None
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            # Inputs for cell ID, image, and path
             cellid_input = gr.Textbox(label="Cell ID", placeholder="Image_1")
             image_input = gr.File(label="Input foci image")
             image_preview = gr.Image(label="Max projection of foci image")
             image_input.change(fn=preview_image, inputs=image_input, outputs=image_preview)
             path_input = gr.File(label="SNT traces file")
+            # Additional options wrapped in an accordion for better UI experience
+            with gr.Accordion("Additional options ..."):
+                sphere_radius = gr.Number(label="Trace sphere radius (um)", value=0.1984125, interactive=True)
+                peak_threshold = gr.Number(label="Peak relative threshold", value=0.4, interactive=True)
+                # Resolutions for xy and z axis
+                with gr.Row():
+                    xy_res = gr.Number(label='xy-yesolution (um)', value=0.0396825, interactive=True)
+                    z_res = gr.Number(label='z resolution (um)', value=0.0909184, interactive=True)
+                # Resolutions for xy and z axis
+                threshold_type = gr.Radio(["per-trace", "per-cell"], label="Threshold-type", value="per-trace", interactive=True)
+        # The output column showing the result of processing
         with gr.Column():
             trace_output = gr.Image(label="Overlayed paths")
             image_output=gr.Gallery(label="Traced paths")
             data_output=gr.DataFrame(label="Detected peak data")#, "Peak 1 pos", "Peak 1 int"])
             data_file_output=gr.File(label="Output data file (.csv)")
+    def process(cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type):
+        config = { 'sphere_radius': sphere_radius,
+                   'peak_threshold': peak_threshold,
+                   'xy_res': xy_res,
+                   'z_res': z_res,
+                   'threshold_type': threshold_type }
+        paths, traces, fig, extracted_peaks = analyse_paths(cellid_input, image_input.name, path_input.name, config)
+        extracted_peaks.to_csv('output.csv')
+        print('extracted', extracted_peaks)
+        return paths, [Image.fromarray(im) for im in traces], fig, extracted_peaks, 'output.csv'
     with gr.Row():
         greet_btn = gr.Button("Process")
+        greet_btn.click(fn=process, inputs=[cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type], outputs=[trace_output, image_output, plot_output, data_output, data_file_output], api_name="process")
 if __name__ == "__main__":

path_analysis/__init__.py ADDED Viewed

File without changes

path_analysis/analyse.py ADDED Viewed

	@@ -0,0 +1,375 @@

+import lxml.etree as ET
+import gzip
+import tifffile
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image, ImageDraw
+import pandas as pd
+from itertools import cycle
+from .data_preprocess import analyse_traces
+import math
+import scipy.linalg as la
+def get_paths_from_traces_file(traces_file):
+    """
+    Parses the specified traces file and extracts paths and their lengths.
+    Args:
+        traces_file (str): Path to the XML traces file.
+    Returns:
+        tuple: A tuple containing a list of paths (each path is a list of tuples representing points)
+               and a list of corresponding path lengths.
+    """
+    tree = ET.parse(traces_file)
+    root = tree.getroot()
+    all_paths = []
+    path_lengths = []
+    for path in root.findall('path'):
+        length=path.get('reallength')
+        path_points = []
+        for point in path:
+            path_points.append((int(point.get('x')), int(point.get('y')), int(point.get('z'))))
+        all_paths.append(path_points)
+        path_lengths.append(float(length))
+    return all_paths, path_lengths
+def calculate_path_length(point_list, voxel_size=(1,1,1)):
+    # Simple calculation
+    l = 0
+    s = np.array(voxel_size)
+    for i in range(len(point_list)-1):
+        l += la.norm(s * (np.array(point_list[i+1]) - np.array(point_list[i])))
+    return l
+def calculate_path_length_partials(point_list, voxel_size=(1,1,1)):
+    # Simple calculation
+    l = [0.0]
+    s = np.array(voxel_size)
+    for i in range(len(point_list)-1):
+        l.append(la.norm(s * (np.array(point_list[i+1]) - np.array(point_list[i]))))
+    return np.cumsum(l)
+def visualise_ordering(points_list, dim, wr=5, wc=5):
+    """
+    Visualize the ordering of points in an image.
+    Args:
+        points_list (list): List of points to be visualized.
+        dim (tuple): Dimensions of the image (rows, columns, channels).
+        wr (int, optional): Width of the region to visualize around the point in the row direction. Defaults to 5.
+        wc (int, optional): Width of the region to visualize around the point in the column direction. Defaults to 5.
+    Returns:
+        np.array: An image array with visualized points.
+    """
+    # Visualizes the ordering of the points in the list on a blank image.
+    rdim, cdim, _ = dim
+    vis = np.zeros((rdim, cdim, 3), dtype=np.uint8)
+    def get_col(i):
+        r = int(255 * i/len(points_list))
+        g = 255 - r
+        return r, g, 0
+    for n, p in enumerate(points_list):
+        c, r, _ = map(int, p)
+        vis[max(0,r-wr):min(rdim,r+wr+1),max(0,c-wc):min(cdim,c+wc+1)] = get_col(n)
+    return vis
+# A color map for paths
+col_map = [(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255), (0,255,255),
+           (255,127,0), (255, 0, 127), (127, 255, 0), (0, 255, 127), (127,0,255), (0,127,255)]
+def draw_paths(all_paths, foci_stack, foci_index=None, r=3):
+    """
+    Draws paths on the provided image stack and overlays markers for the foci
+    Args:
+        all_paths (list): List of paths where each path is a list of points.
+        foci_stack (np.array): 3D numpy array representing the image stack.
+        foci_index (list, optional): List of list of focus indices (along each path). Defaults to None.
+        r (int, optional): Radius for the ellipse or line drawing around the focus. Defaults to 3.
+    Returns:
+        PIL.Image.Image: An image with the drawn paths.
+    """
+    im = np.max(foci_stack, axis=0)
+    im = (im/np.max(im)*255).astype(np.uint8)
+    im = np.dstack((im,)*3)
+    im = Image.fromarray(im)
+    draw = ImageDraw.Draw(im)
+    for i, (p, col) in enumerate(zip(all_paths, cycle(col_map))):
+        draw.line([(u[0], u[1]) for u in p], fill=col)
+        draw.text((p[0][0], p[0][1]), str(i+1), fill=col)
+    if foci_index is not None:
+        for i, (idx, p, col) in enumerate(zip(foci_index, all_paths, cycle(col_map))):
+            if len(idx):
+                for j in idx:
+                    draw.line((int(p[j][0]-r), int(p[j][1]), int(p[j][0]+r), int(p[j][1])), fill=col, width=2)
+                    draw.line((int(p[j][0]), int(p[j][1]-r), int(p[j][0]), int(p[j][1]+r)), fill=col, width=2)
+    return im
+def measure_from_mask(mask, measure_stack):
+    """
+    Compute the sum of measure_stack values where the mask is equal to 1.
+    Args:
+        mask (numpy.ndarray): Binary mask where the measurement should be applied.
+        measure_stack (numpy.ndarray): Stack of measurements.
+    Returns:
+        measure_stack.dtype: Sum of measure_stack values where the mask is 1.
+    """
+    return np.sum(mask * measure_stack)
+# Max of measure_stack over region where mask==1
+def max_from_mask(mask, measure_stack):
+    """
+    Compute the maximum of measure_stack values where the mask is equal to 1.
+    Args:
+        mask (numpy.ndarray): Binary mask where the measurement should be applied.
+        measure_stack (numpy.ndarray): Stack of measurements.
+    Returns:
+        measure_stack.dtype: Maximum value of measure_stack where the mask is 1.
+    """
+    return np.max(mask * measure_stack)
+def make_mask_s(p, melem, measure_stack):
+    """
+    Translate a mask to point p, ensuring correct treatment near the edges of the measure_stack.
+    Args:
+        p (tuple): Target point (r, c, z).
+        melem (numpy.ndarray): Structuring element for the mask.
+        measure_stack (numpy.ndarray): Stack of measurements.
+    Returns:
+        tuple: A tuple containing the translated mask and a section of the measure_stack.
+    """
+    #
+    R = [u//2 for u in melem.shape]
+    r, c, z = p
+    mask = np.zeros(melem.shape)
+    m_data = np.zeros(melem.shape)
+    s = measure_stack.shape
+    o_1, o_2, o_3 = max(R[0]-r, 0), max(R[1]-c, 0), max(R[2]-z,0)
+    e_1, e_2, e_3 = min(R[0]-r+s[0], 2*R[0]+1), min(R[1]-c+s[1], 2*R[1]+1), min(R[2]-z+s[2], 2*R[2]+1)
+    m_data[o_1:e_1,o_2:e_2,o_3:e_3] = measure_stack[max(r-R[0],0):min(r+R[0]+1,s[0]),max(c-R[1],0):min(c+R[1]+1,s[1]),max(z-R[2],0):min(z+R[2]+1, s[2])]
+    mask[o_1:e_1,o_2:e_2,o_3:e_3] = melem[o_1:e_1,o_2:e_2,o_3:e_3]
+    return mask, m_data
+def measure_at_point(p, melem, measure_stack, op='mean'):
+    """
+    Measure the mean or max value of measure_stack around a specific point using a structuring element.
+    Args:
+        p (tuple): Target point (r, c, z).
+        melem (numpy.ndarray): Structuring element for the mask.
+        measure_stack (numpy.ndarray): Stack of measurements.
+        op (str, optional): Operation to be applied; either 'mean' or 'max'. Default is 'mean'.
+    Returns:
+        float: Measured value based on the specified operation.
+    """
+    p = map(int, p)
+    if op=='mean':
+        mask, m_data = make_mask_s(p, melem, measure_stack)
+        melem_size = np.sum(mask)
+        return float(measure_from_mask(mask, m_data) / melem_size)
+    else:
+        mask, m_data = make_mask_s(p, melem, measure_stack)
+        return float(max_from_mask(mask, m_data))
+# Generate spherical region
+def make_sphere(R=5, z_scale_ratio=2.3):
+    """
+    Generate a binary representation of a sphere in 3D space.
+    Args:
+        R (int, optional): Radius of the sphere. Default is 5.
+        z_scale_ratio (float, optional): Scaling factor for the z-axis. Default is 2.3.
+    Returns:
+        numpy.ndarray: Binary representation of the sphere.
+    """
+    R_z = int(math.ceil(R/z_scale_ratio))
+    x, y, z = np.ogrid[-R:R+1, -R:R+1, -R_z:R_z+1]
+    sphere = x**2 + y**2 + (z_scale_ratio * z)**2 <= R**2
+    return sphere
+# Measure the values of measure_stack at each of the points of points_list in turn.
+# Measurement is the mean / max (specified by op) on the spherical region about each point
+def measure_all_with_sphere(points_list, measure_stack, op='mean', R=5, z_scale_ratio=2.3):
+    """
+    Measure the values of measure_stack at each point in a list using a spherical region.
+    Args:
+        points_list (list): List of points (r, c, z) to be measured.
+        measure_stack (numpy.ndarray): Stack of measurements.
+        op (str, optional): Operation to be applied; either 'mean' or 'max'. Default is 'mean'.
+        R (int, optional): Radius of the sphere. Default is 5.
+        z_scale_ratio (float, optional): Scaling factor for the z-axis. Default is 2.3.
+    Returns:
+        list: List of measured values for each point.
+    """
+    melem = make_sphere(R, z_scale_ratio)
+    measure_func = lambda p: measure_at_point(p, melem, measure_stack, op)
+    return list(map(measure_func, points_list))
+# Measure fluorescence levels along ordered skeleton
+def measure_chrom2(path, hei10, config):
+    """
+    Measure fluorescence levels along an ordered skeleton.
+    Args:
+        path (list): List of ordered path points (r, c, z).
+        hei10 (numpy.ndarray): 3D fluorescence data.
+        config (dict): Configuration dictionary containing 'z_res', 'xy_res', and 'sphere_radius' values.
+    Returns:
+        tuple: A tuple containing the visualization, mean measurements, and max measurements along the path.
+    """
+    scale_ratio = config['z_res']/config['xy_res']
+    sphere_xy_radius = int(math.ceil(config['sphere_radius']/config['xy_res']))
+    vis = visualise_ordering(path, dim=hei10.shape, wr=sphere_xy_radius, wc=sphere_xy_radius)
+    measurements = measure_all_with_sphere(path, hei10, op='mean', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
+    measurements_max = measure_all_with_sphere(path, hei10, op='max', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
+    return vis, measurements, measurements_max
+def extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config):
+    """
+    Extract peak information from given traces and compile them into a DataFrame.
+    Args:
+    - cell_id (int or str): Identifier for the cell being analyzed.
+    - all_paths (list of lists): Contains ordered path points for multiple paths.
+    - path_lengths (list of floats): List containing lengths of each path in all_paths.
+    - measured_traces (list of lists): Contains fluorescence measurement values along the paths.
+    - config (dict): Configuration dictionary containing:
+        - 'peak_threshold': Threshold value to determine a peak in the trace.
+        - 'sphere_radius': Radius of the sphere used in fluorescence measurement.
+    Returns:
+    - pd.DataFrame: DataFrame containing peak information for each path.
+    - list of lists: Absolute intensities of the detected foci.
+    - list of lists: Index positions of the detected foci.
+    - list of lists: Absolute focus intensity threshold for each trace.
+    - list of numpy.ndarray: For each trace, distances of each point from start of trace in microns
+    """
+    n_paths = len(all_paths)
+    data = []
+    foci_absolute_intensity, foci_position, foci_position_index, trace_median_intensities, trace_thresholds = analyse_traces(all_paths, path_lengths, measured_traces, config)
+    total_intensity = sum(sum(path_foci_abs_int - tmi) for path_foci_abs_int, tmi in zip(foci_absolute_intensity, trace_median_intensities))
+    trace_positions = []
+    for i in range(n_paths):
+        pl = calculate_path_length_partials(all_paths[i], (config['xy_res'], config['xy_res'], config['z_res']))
+        path_data = { 'Cell_ID':cell_id,
+                      'Trace': i+1,
+                      'SNT_trace_length(um)': path_lengths[i],
+                      'Measured_trace_length(um)': pl[-1],
+                      'Trace_median_intensity': trace_median_intensities[i],
+                      'Detection_sphere_radius(um)': config['sphere_radius'],
+                      'Foci_ID_threshold': config['peak_threshold'] }
+        for j, (idx, u,v) in enumerate(zip(foci_position_index[i], foci_position[i], foci_absolute_intensity[i])):
+            if config['use_corrected_positions']:
+                path_data[f'Foci_{j+1}_position(um)'] = pl[idx]
+            else:
+                path_data[f'Foci_{j+1}_position(um)'] = u
+            path_data[f'Foci_{j+1}_absolute_intensity'] = v
+            path_data[f'Foci_{j+1}_relative_intensity'] = (v - trace_median_intensities[i])/total_intensity
+        data.append(path_data)
+        trace_positions.append(pl)
+    return pd.DataFrame(data), foci_absolute_intensity, foci_position_index, trace_thresholds, trace_positions
+def analyse_paths(cell_id,
+                  foci_file,
+                  traces_file,
+                  config
+    ):
+    """
+    Analyzes paths for the given cell ID using provided foci and trace files.
+    Args:
+        cell_id (int/str): Identifier for the cell.
+        foci_file (str): Path to the foci image file.
+        traces_file (str): Path to the XML traces file.
+        config (dict): Configuration dictionary containing necessary parameters such as resolutions and thresholds.
+    Returns:
+        tuple: A tuple containing an overlay image of the traces, visualization images for each trace,
+               a figure with plotted measurements, and a dataframe with extracted peaks.
+    """
+    foci_stack = tifffile.imread(foci_file)
+    if foci_stack.ndim==2:
+        foci_stack = foci_stack[None,:,:]
+    all_paths, path_lengths = get_paths_from_traces_file(traces_file)
+    all_trace_vis = []
+    all_m = []
+    for p in all_paths:
+        vis, m, _ = measure_chrom2(p,foci_stack.transpose(2,1,0), config)
+        all_trace_vis.append(vis)
+        all_m.append(m)
+    extracted_peaks, foci_absolute_intensity, foci_pos_index, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, all_m, config)
+    n_cols = 2
+    n_rows = (len(all_paths)+n_cols-1)//n_cols
+    fig, ax = plt.subplots(n_rows,n_cols)
+    ax = ax.flatten()
+    for i, m in enumerate(all_m):
+        ax[i].set_title(f'Trace {i+1}')
+        ax[i].plot(trace_positions[i], m)
+        print(foci_pos_index)
+        if len(foci_pos_index[i]):
+            ax[i].plot(trace_positions[i][foci_pos_index[i]], np.array(m)[foci_pos_index[i]], 'rx')
+            ax[i].set_xlabel('Distance from start (um)')
+            ax[i].set_ylabel('Intensity')
+            ax[i].axhline(trace_thresholds[i], c='r', ls=':')
+    for i in range(len(all_m), n_cols*n_rows):
+        ax[i].axis('off')
+    plt.tight_layout()
+    trace_overlay = draw_paths(all_paths, foci_stack, foci_index=foci_pos_index)
+    return trace_overlay, all_trace_vis, fig, extracted_peaks

path_analysis/data_preprocess.py ADDED Viewed

	@@ -0,0 +1,314 @@

+from dataclasses import dataclass
+import numpy as np
+import scipy.linalg as la
+from scipy.signal import find_peaks
+from math import ceil
+def thin_points(point_list, dmin=10, voxel_size=(1,1,1)):
+    """
+    Remove points within a specified distance of each other, retaining the point with the highest intensity.
+    Args:
+    - point_list (list of tuples): Each tuple contains:
+        - x (list of float): 3D coordinates of the point.
+        - intensity (float): The intensity value of the point.
+        - idx (int): A unique identifier or index for the point.
+    - dmin (float, optional): Minimum distance between points. Points closer than this threshold will be thinned. Defaults to 10.
+    Returns:
+    - list of int: A list containing indices of the removed points.
+    Notes:
+    - The function uses the L2 norm (Euclidean distance) to compute the distance between points.
+    - When two points are within `dmin` distance, the point with the lower intensity is removed.
+    """
+    removed_points = []
+    for i in range(len(point_list)):
+        if point_list[i][2] in removed_points:
+            continue
+        for j in range(len(point_list)):
+            if i==j:
+                continue
+            if point_list[j][2] in removed_points:
+                continue
+            d = (np.array(point_list[i][0]) - np.array(point_list[j][0]))*np.array(voxel_size)
+            d = la.norm(d)
+            if d<dmin:
+                hi = point_list[i][1]
+                hj = point_list[j][1]
+                if hi<hj:
+                    removed_points.append(point_list[i][2])
+                    break
+                else:
+                    removed_points.append(point_list[j][2])
+    return removed_points
+@dataclass
+class CellData(object):
+    """Represents data related to a single cell.
+    Attributes:
+        pathdata_list (list): A list of PathData objects representing the various paths associated with the cell.
+    """
+    pathdata_list: list
+@dataclass
+class PathData(object):
+    """Represents data related to a specific path in the cell.
+    This dataclass encapsulates information about the peaks,
+    the defining points, the fluorescence values, and the path length of a specific path.
+    Attributes: peaks (list): List of peaks in the path (indicies of positions in points, o_hei10).
+        points (list): List of points defining the path.
+        o_hei10 (list): List of (unnormalized) fluorescence intensity values along the path
+        SC_length (float): Length of the path.
+    """
+    peaks: list
+    points: list
+    o_hei10: list
+    SC_length: float
+def find_peaks2(v, distance=5,  prominence=0.5):
+    """
+    Find peaks in a 1D array with extended boundary handling.
+    The function pads the input array at both ends to handle boundary peaks. It then identifies peaks in the extended array
+    and maps them back to the original input array.
+    Args:
+    - v (numpy.ndarray): 1D input array in which to find peaks.
+    - distance (int, optional): Minimum number of array elements that separate two peaks. Defaults to 5.
+    - prominence (float, optional): Minimum prominence required for a peak to be identified. Defaults to 0.5.
+    Returns:
+    - list of int: List containing the indices of the identified peaks in the original input array.
+    - dict: Information about the properties of the identified peaks (as returned by scipy.signal.find_peaks).
+    """
+    pad = int(ceil(distance))+1
+    v_ext = np.concatenate([np.ones((pad,), dtype=v.dtype)*np.min(v), v, np.ones((pad,), dtype=v.dtype)*np.min(v)])
+    assert(len(v_ext) == len(v)+2*pad)
+    peaks, _ = find_peaks(v_ext, distance=distance, prominence=prominence)
+    peaks = peaks - pad
+    n_peaks = []
+    for i in peaks:
+        if 0<=i<len(v):
+            n_peaks.append(i)
+        else:
+            raise Exception
+    return n_peaks, _
+def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
+    """
+    Process traces of cells to extract peak information and organize the data.
+    The function normalizes fluorescence data, finds peaks, refines peak information,
+    removes unwanted peaks that might be due to close proximity of bright peaks from
+    other paths, and organizes all the information into a structured data format.
+    Args:
+        all_paths (list of list of tuples): A list containing paths, where each path is
+                                            represented as a list of 3D coordinate tuples.
+        path_lengths (list of float): List of path lengths corresponding to the provided paths.
+        measured_trace_fluorescence (list of list of float): A list containing fluorescence
+                                                            data corresponding to each path point.
+    Returns:
+        CellData: An object containing organized peak and path data for a given cell.
+    Note:
+        - The function assumes that each path and its corresponding length and fluorescence data
+          are positioned at the same index in their respective lists.
+    """
+    cell_peaks = []
+    for points, path_length, o_hei10 in zip(all_paths, path_lengths, measured_trace_fluorescence):
+        # For peak determination normalize each trace to have mean zero and s.d. 1
+        hei10_normalized = (o_hei10 - np.mean(o_hei10))/np.std(o_hei10)
+        # Find peaks - these will be further refined later
+        p,_ = find_peaks2(hei10_normalized, distance=5,  prominence=0.5*np.std(hei10_normalized))
+        peaks = np.array(p, dtype=np.int32)
+        # Store peak data - using original values, not normalized ones
+        peak_mean_heights = [ o_hei10[u] for u in peaks ]
+        peak_points = [ points[u] for u in peaks ]
+        cell_peaks.append((peaks, peak_points, peak_mean_heights))
+    # Eliminate peaks which have another larger peak nearby (in 3D space, on any chromosome).
+    # This aims to remove small peaks in the mean intensity generated when an  SC passes close
+    # to a bright peak on another SC - this is nearby in space, but brighter.
+    to_thin = []
+    for k in range(len(cell_peaks)):
+        for u in range(len(cell_peaks[k][0])):
+            to_thin.append((cell_peaks[k][1][u], cell_peaks[k][2][u], (k, u)))
+    # Exclude any peak with a nearby brighter peak (on any SC)
+    removed_points = thin_points(to_thin)
+    # Clean up and remove these peaks
+    new_cell_peaks = []
+    for k in range(len(cell_peaks)):
+        cc = []
+        pp = cell_peaks[k][0]
+        for u in range(len(pp)):
+            if (k,u) not in removed_points:
+                cc.append(pp[u])
+        new_cell_peaks.append(cc)
+    cell_peaks = new_cell_peaks
+    pd_list = []
+    # Save peak positions, absolute HEI10 intensities, and length for each SC
+    for k in range(len(all_paths)):
+        points, o_hei10 = all_paths[k], measured_trace_fluorescence[k]
+        peaks = cell_peaks[k]
+        pd = PathData(peaks=peaks, points=points, o_hei10=o_hei10, SC_length=path_lengths[k])
+        pd_list.append(pd)
+    cd = CellData(pathdata_list=pd_list)
+    return cd
+alpha_max = 0.4
+# Criterion used for identifying peak as a CO - normalized (with mean and s.d.)
+# hei10 levels being above 0.4 time maximum peak level
+def pc(pos, v, alpha=alpha_max):
+    """
+    Identify and return positions where values in the array `v` exceed a certain threshold.
+    The threshold is computed as `alpha` times the maximum value in `v`.
+    Args:
+    - pos (numpy.ndarray): Array of positions.
+    - v (numpy.ndarray): 1D array of values, e.g., intensities.
+    - alpha (float, optional): A scaling factor for the threshold. Defaults to `alpha_max`.
+    Returns:
+    - numpy.ndarray: Array of positions where corresponding values in `v` exceed the threshold.
+    """
+    idx = (v>=alpha*np.max(v))
+    return np.array(pos[idx])
+def analyse_celldata(cell_data, config):
+    """
+    Analyse the provided cell data to extract focus-related information.
+    Args:
+        cd (CellData): An instance of the CellData class containing path data information.
+        config (dictionary): Configuration dictionary containing 'peak_threshold' and 'threshold_type'
+                             'peak_threshold' (float) - threshold for calling peaks as foci
+                             'threshold_type' (str) = 'per-trace', 'per-foci'
+    Returns:
+        tuple: A tuple containing three lists:
+            - foci_rel_intensity (list): List of relative intensities for the detected foci.
+            - foci_pos (list): List of absolute positions of the detected foci.
+            - foci_pos_index (list): List of indices of the detected foci.
+    """
+    foci_abs_intensity = []
+    foci_pos = []
+    foci_pos_index = []
+    trace_median_intensities = []
+    trace_thresholds = []
+    peak_threshold = config['peak_threshold']
+    threshold_type = config['threshold_type']
+    if threshold_type == 'per-trace':
+        """
+        Call extracted peaks as foci if intensity - trace_mean > peak_threshold * (trace_max_foci_intensity - trace_mean)
+        """
+        for path_data in cell_data.pathdata_list:
+            peaks = np.array(path_data.peaks, dtype=np.int32)
+            # Normalize extracted fluorescent intensities by subtracting mean (and dividing
+            # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_hei10)
+            h = h - np.mean(h)
+            h = h/np.std(h)
+            # Extract peaks according to criterion
+            sig_peak_idx = pc(peaks, h[peaks], peak_threshold)
+            trace_thresholds.append((1-peak_threshold)*np.mean(path_data.o_hei10) + peak_threshold*np.max(np.array(path_data.o_hei10)[peaks]))
+            pos_abs = (sig_peak_idx/len(path_data.points))*path_data.SC_length
+            foci_pos.append(pos_abs)
+            foci_abs_intensity.append(np.array(path_data.o_hei10)[sig_peak_idx])
+            foci_pos_index.append(sig_peak_idx)
+            trace_median_intensities.append(np.median(path_data.o_hei10))
+    elif threshold_type == 'per-cell':
+        """
+        Call extracted peaks as foci if intensity - trace_mean > peak_threshold * max(intensity - trace_mean)
+        """
+        max_cell_intensity = float("-inf")
+        for path_data in cell_data.pathdata_list:
+            # Normalize extracted fluorescent intensities by subtracting mean (and dividing
+            # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_hei10)
+            h = h - np.mean(h)
+            max_cell_intensity = max(max_cell_intensity, np.max(h))
+        for path_data in cell_data.pathdata_list:
+            peaks = np.array(path_data.peaks, dtype=np.int32)
+            # Normalize extracted fluorescent intensities by subtracting mean (and dividing
+            # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_hei10)
+            h = h - np.mean(h)
+            sig_peak_idx = peaks[h[peaks]>peak_threshold*max_cell_intensity]
+            trace_thresholds.append(np.mean(path_data.o_hei10) + peak_threshold*max_cell_intensity)
+            pos_abs = (sig_peak_idx/len(path_data.points))*path_data.SC_length
+            foci_pos.append(pos_abs)
+            foci_abs_intensity.append(np.array(path_data.o_hei10)[sig_peak_idx])
+            foci_pos_index.append(sig_peak_idx)
+            trace_median_intensities.append(np.median(path_data.o_hei10))
+    else:
+        raise NotImplementedError
+    return foci_abs_intensity, foci_pos, foci_pos_index, trace_median_intensities, trace_thresholds
+def analyse_traces(all_paths, path_lengths, measured_trace_fluorescence, config):
+    cd = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    return analyse_celldata(cd, config)

setup.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from setuptools import setup, find_packages
+setup(
+    name='path_analysis',
+    version='0.1.0',
+    description='A brief description of your package',
+    author='Your Name',
+    author_email='youremail@example.com',
+    url='https://github.com/yourusername/yourrepository',  # if you have a repo for the project
+    packages=find_packages(),  # or specify manually: ['your_package', 'your_package.submodule', ...]
+    install_requires=[
+        'numpy',  # for example, if your package needs numpy
+        'gradio',
+        # ... other dependencies
+    ],
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        # ... other classifiers
+    ],
+    python_requires='>=3.6',  # your project's Python version requirement
+    keywords='some keywords related to your project',
+    # ... other parameters
+)

tests/__init__.py ADDED Viewed

File without changes

tests/test_analyse.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from path_analysis.analyse import *
+import numpy as np
+from math import pi
+import xml.etree.ElementTree as ET
+def test_get_paths_from_traces_file():
+    # Mock the XML traces file content
+    xml_content = '''<?xml version="1.0"?>
+    <root>
+        <path reallength="5.0">
+            <point x="1" y="2" z="3"/>
+            <point x="4" y="5" z="6"/>
+        </path>
+        <path reallength="10.0">
+            <point x="7" y="8" z="9"/>
+            <point x="10" y="11" z="12"/>
+        </path>
+    </root>
+    '''
+    # Create a temporary XML file
+    with open("temp_traces.xml", "w") as f:
+        f.write(xml_content)
+    all_paths, path_lengths = get_paths_from_traces_file("temp_traces.xml")
+    expected_paths = [[(1, 2, 3), (4, 5, 6)], [(7, 8, 9), (10, 11, 12)]]
+    expected_lengths = [5.0, 10.0]
+    assert all_paths == expected_paths, f"Expected paths {expected_paths}, but got {all_paths}"
+    assert path_lengths == expected_lengths, f"Expected lengths {expected_lengths}, but got {path_lengths}"
+    # Clean up temporary file
+    import os
+    os.remove("temp_traces.xml")
+def test_measure_chrom2():
+    # Mock data
+    path = [(2, 3, 4), (4, 5, 6), (9, 9, 9)]  # Sample ordered path points
+    hei10 = np.random.rand(10, 10, 10)  # Random 3D fluorescence data
+    config = {
+        'z_res': 1,
+        'xy_res': 0.5,
+        'sphere_radius': 2.5
+    }
+    # Function call
+    _, measurements, measurements_max = measure_chrom2(path, hei10, config)
+    # Assertions
+    assert len(measurements) == len(path), "Measurements length should match path length"
+    assert len(measurements_max) == len(path), "Max measurements length should match path length"
+    assert all(0 <= val <= 1 for val in measurements), "All mean measurements should be between 0 and 1 for this mock data"
+    assert all(0 <= val <= 1 for val in measurements_max), "All max measurements should be between 0 and 1 for this mock data"
+def test_measure_chrom2_z():
+    # Mock data
+    path = [(2, 3, 4), (4, 5, 6)]  # Sample ordered path points
+    _,_,hei10 = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
+    config = {
+        'z_res': 1,
+        'xy_res': 0.5,
+        'sphere_radius': 2.5
+    }
+    # Function call
+    _, measurements, measurements_max = measure_chrom2(path, hei10, config)
+    # Assertions
+    assert len(measurements) == len(path), "Measurements length should match path length"
+    assert len(measurements_max) == len(path), "Max measurements length should match path length"
+    assert all(measurements == np.array([4,6]))
+    assert all(measurements_max == np.array([6,8]))
+def test_measure_chrom2_z2():
+    # Mock data
+    path = [(0,0,0), (2, 3, 4), (4, 5, 6)]  # Sample ordered path points
+    _,_,hei10 = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
+    config = {
+        'z_res': 0.25,
+        'xy_res': 0.5,
+        'sphere_radius': 2.5
+    }
+    # Function call
+    _, measurements, measurements_max = measure_chrom2(path, hei10, config)
+    # Assertions
+    assert len(measurements) == len(path), "Measurements length should match path length"
+    assert len(measurements_max) == len(path), "Max measurements length should match path length"
+    assert all(measurements_max == np.array([9,9,9]))
+def test_measure_from_mask():
+    mask = np.array([
+        [0, 1, 0],
+        [1, 1, 1],
+        [0, 1, 0]
+    ])
+    measure_stack = np.array([
+        [2, 4, 2],
+        [4, 8, 4],
+        [2, 4, 2]
+    ])
+    result = measure_from_mask(mask, measure_stack)
+    assert result == 24  # Expected sum: 4+4+8+4+4
+def test_max_from_mask():
+    mask = np.array([
+        [0, 1, 0],
+        [1, 1, 1],
+        [0, 1, 0]
+    ])
+    measure_stack = np.array([
+        [2, 5, 2],
+        [4, 8, 3],
+        [2, 7, 2]
+    ])
+    result = max_from_mask(mask, measure_stack)
+    assert result == 8  # Expected max: 8
+def test_measure_at_point_mean():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [3, 3, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (1, 1, 1)
+    melem = np.ones((3, 3, 3))
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 4, "Expected mean: 4"
+def test_measure_at_point_mean_off1():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (0, 0, 0)
+    melem = np.ones((3, 3, 3))
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 4.5,  "Expected mean: 4.5"
+def test_measure_at_point_mean_off2():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (3, 1, 1)
+    melem = np.ones((3, 3, 3))
+    print(measure_stack[p[0], p[1], p[2]])
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 32/18  # Expected mean: 4.5
+def test_measure_at_point_mean_off3():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (3, 1, 1)
+    melem = np.ones((1, 1, 3))
+    print(measure_stack[p[0], p[1], p[2]])
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 0,  "Expected mean: 4.5"
+def test_measure_at_point_mean_off3():
+    measure_stack = np.array([
+        [[2, 2, 2, 0], [4, 4, 6, 0], [5, 5, 2, 0], [0, 0, 0, 0]],
+        [[4, 4, 4, 0], [8, 8, 8, 0], [4, 4, 4, 0], [0, 0, 0, 0]],
+        [[3, 3, 3, 0], [6, 6, 4, 0], [3, 2, 2, 0], [0, 0, 0, 0]],
+        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+    ])
+    p = (3, 1, 1)
+    melem = np.ones((3, 1, 1))
+    print(measure_stack[p[0], p[1], p[2]])
+    result = measure_at_point(p, melem, measure_stack, op='mean')
+    assert result == 3, "Expected mean: 4.5"
+def test_measure_at_point_max():
+    measure_stack = np.array([
+        [[2, 2, 2], [4, 4, 4], [2, 2, 2]],
+        [[4, 5, 4], [8, 7, 9], [4, 4, 4]],
+        [[2, 2, 2], [4, 4, 4], [2, 2, 2]]
+    ])
+    p = (1, 1, 1)
+    melem = np.ones((3, 3, 3))
+    result = measure_at_point(p, melem, measure_stack, op='max')
+    assert result == 9, "Expected max: 9"
+def test_make_sphere_equal():
+    R = 5
+    z_scale_ratio = 1.0
+    sphere = make_sphere(R, z_scale_ratio)
+    # Check the returned type
+    assert isinstance(sphere, np.ndarray), "Output should be a numpy ndarray"
+    # Check the shape
+    expected_shape = (2*R+1, 2*R+1, 2*R+1)
+    assert sphere.shape == expected_shape, f"Expected shape {expected_shape}, but got {sphere.shape}"
+    assert (sphere[:,:,::-1] == sphere).all(), f"Expected symmetrical mask"
+    assert (sphere[:,::-1,:] == sphere).all(), f"Expected symmetrical mask"
+    assert (sphere[::-1,:,:] == sphere).all(), f"Expected symmetrical mask"
+    assert abs(np.sum(sphere)-4/3*pi*R**3)<10, f"Expected approximate volume to be correct"
+    assert (sphere[R,R,0] == 1), f"Expected centre point on top plane to be within sphere"
+    assert (sphere[R+1,R,0] == 0), f"Expected point next to centre on top plane to be outside sphere"

tests/test_preprocess.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from path_analysis.data_preprocess import *
+import numpy as np
+import pytest
+def test_thin_points():
+    # Define a sample point list
+    points = [
+        ([0, 0, 0], 10, 0),
+        ([1, 1, 1], 8, 1),
+        ([10, 10, 10], 12, 2),
+        ([10.5, 10.5, 10.5], 5, 3),
+        ([20, 20, 20], 15, 4)
+    ]
+    # Call the thin_points function with dmin=5 (for example)
+    removed_indices = thin_points(points, dmin=5)
+    # Check results
+    # Point at index 1 ([1, 1, 1]) should be removed since it's within 5 units distance of point at index 0 and has lower intensity.
+    # Similarly, point at index 3 ([10.5, 10.5, 10.5]) should be removed as it's close to point at index 2 and has lower intensity.
+    assert set(removed_indices) == {1, 3}
+    # Another simple test to check if function does nothing when points are far apart
+    far_points = [
+        ([0, 0, 0], 10, 0),
+        ([100, 100, 100], 12, 1),
+        ([200, 200, 200], 15, 2)
+    ]
+    removed_indices_far = thin_points(far_points, dmin=5)
+    assert len(removed_indices_far) == 0  # Expect no points to be removed
+def test_find_peaks2():
+    # Basic test
+    data = np.array([0, 0, 0, 0, 0, 0, 5, 0, 3, 0])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {6}  # Expected peaks at positions 6
+    # Basic test
+    data = np.array([0, 2, 0, 0, 0, 0, 0, 0, 0, 0])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {1}  # Expected peaks at positions 1
+    # Test with padding impacting peak detection
+    data = np.array([3, 2.9, 0, 0, 0, 3])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {0,5}  # Peaks at both ends
+    # Test with close peaks
+    data = np.array([3, 0, 3])
+    peaks, _ = find_peaks2(data)
+    assert set(peaks) == {2}  # Peak at right end only
+    # Test with close peaks
+    # Test with close peaks
+    data = np.array([3, 0, 3])
+    peaks, _ = find_peaks2(data, distance=1)
+    assert set(peaks) == {0,2}  # Peaks at both ends
+    # Test with close peaks
+    data = np.array([0, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3])
+    peaks, _ = find_peaks2(data, distance=1)
+    assert set(peaks) == {2,7}  # Peak at centre (rounded to the left) of groups of maximum values
+    # Test with prominence threshold
+    data = np.array([0, 1, 0, 0.4, 0])
+    peaks, _ = find_peaks2(data, prominence=0.5)
+    assert peaks == [1]  # Only the peak at position 1 meets the prominence threshold
+def test_pc():
+    pos = np.array([0, 1, 2, 3, 4, 6])
+    values = np.array([0.1, 0.5, 0.2, 0.8, 0.3, 0.9])
+    # Basic test
+    assert np.array_equal(pc(pos, values), np.array([1, 3, 6]))  # only values 0.8 and 0.9 exceed 0.4 times the max (which is 0.9)
+    # Test with custom alpha
+    assert np.array_equal(pc(pos, values, alpha=0.5), np.array([1, 3, 6]))
+    # Test with a larger alpha
+    assert np.array_equal(pc(pos, values, alpha=1.0), [6])  # No values exceed the maximum value itself
+    # Test with all values below threshold
+    values = np.array([0.1, 0.2, 0.3, 0.4])
+    assert np.array_equal(pc(pos[:4], values), [1,2,3])  # All values are below 0.4 times the max (which is 0.4)
+@pytest.fixture
+def mock_data():
+    all_paths = [ [ (0,0,0), (0,2,0), (0,5,0), (0,10,0), (0,15,0), (0,20,0)], [ (1,20,0), (1,20,10), (1,20,20)  ] ] # Mock paths
+    path_lengths = [ 2.2, 2.3 ]  # Mock path lengths
+    measured_trace_fluorescence = [ [100, 8, 3, 2, 3, 39], [38, 2, 20] ]  # Mock fluorescence data
+    return all_paths, path_lengths, measured_trace_fluorescence
+def test_process_cell_traces_return_type(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    assert isinstance(result, CellData), f"Expected CellData but got {type(result)}"
+def test_process_cell_traces_pathdata_list_length(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    assert len(result.pathdata_list) == len(all_paths), f"Expected {len(all_paths)} but got {len(result.pathdata_list)}"
+def test_process_cell_traces_pathdata_path_lengths(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    path_lengths = [p.SC_length for p in result.pathdata_list]
+    expected_path_lengths = [2.2, 2.3]
+    assert  path_lengths == expected_path_lengths, f"Expected {expected_path_lengths} but got {path_lengths}"
+def test_process_cell_traces_peaks(mock_data):
+    all_paths, path_lengths, measured_trace_fluorescence = mock_data
+    result = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
+    peaks = [p.peaks for p in result.pathdata_list]
+    assert peaks == [[0,5],[]]
+# Mock data
+@pytest.fixture
+def mock_celldata():
+    pathdata1 = PathData(peaks=[0, 5], points=[(0,0,0), (0,2,0), (0,5,0), (0,10,0), (0,15,0), (0,20,0)], o_hei10=[100, 8, 3, 2, 3, 39], SC_length=2.2)
+    pathdata2 = PathData(peaks=[0], points=[(1,20,0), (1,20,10), (1,20,20) ], o_hei10=[38, 2, 20], SC_length=2.3)
+    return CellData(pathdata_list=[pathdata1, pathdata2])
+def test_analyse_celldata_output_length(mock_celldata):
+    rel_intensity, pos, pos_index, trace_median_intensity, trace_thresholds = analyse_celldata(mock_celldata, {'peak_threshold': 0.4, 'threshold_type':'per-trace'})
+    assert len(rel_intensity) == len(mock_celldata.pathdata_list), "Mismatch in relative intensities length"
+    assert len(pos) == len(mock_celldata.pathdata_list), "Mismatch in positions length"
+    assert len(pos_index) == len(mock_celldata.pathdata_list), "Mismatch in position indices length"