{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "preprocess_segmentation_data.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "13bd72b174514731a454a78c974c2c1d": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_b065ac27ed364e0fb649c75447805bda", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_a4826465021640ea8740ef1d4e05ddb4", "IPY_MODEL_724664ae5e0747e38415b6fa11e5ad98" ] } }, "b065ac27ed364e0fb649c75447805bda": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "a4826465021640ea8740ef1d4e05ddb4": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_061c4359345b40f7a5aaeaa8756b2e63", "_dom_classes": [], "description": "100%", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 95, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 95, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_fb77e99a93ea49fb82fd7e1b94ecc3fe" } }, "724664ae5e0747e38415b6fa11e5ad98": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_c3410d1ce72c45e4bd58ee3fae6e1606", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 95/95 [00:57<00:00, 1.65it/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_ab2dc9da8ded4eb595a9a81b07ee62da" } }, "061c4359345b40f7a5aaeaa8756b2e63": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "initial", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "fb77e99a93ea49fb82fd7e1b94ecc3fe": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "c3410d1ce72c45e4bd58ee3fae6e1606": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "ab2dc9da8ded4eb595a9a81b07ee62da": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "iKoZh0oBVyb0" }, "source": [ "The goal of this notebook is to preprocess data and save it as `.png` files\n", "\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "r6q336bZ2YZf" }, "source": [ "Images - [Data source](https://ndownloader.figshare.com/files/14487263?private_link=d932c564d5cdd7186679)\n", "\n", "Masks - [Data source](https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/14487260/FinalResults3DMMS.zip)" ] }, { "cell_type": "code", "metadata": { "id": "S-mXanEJ0xhM" }, "source": [ "!wget https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/14487263/RawMembraneImage.zip\n", "!unzip /content/RawMembraneImage.zip\n", "!wget https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/14487260/FinalResults3DMMS.zip\n", "!unzip /content/FinalResults3DMMS.zip" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "qeSAE3aq2DYl" }, "source": [ "import scipy.io\n", "import os\n", "import matplotlib.pyplot as plt \n", "from IPython.display import clear_output\n", "import numpy as np\n", "import cv2\n", "from tqdm.notebook import tqdm\n", "import pandas as pd \n", "from PIL import Image" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "iF0u-jt12Izp" }, "source": [ "all_paths = os.listdir(\"/content/170704plc1p2/\")\n", "all_paths_np = np.array(all_paths)\n", "bools_image_paths = [all_paths[i][0] == \"m\" for i in range(len(all_paths))]\n", "bools_seg_paths = [all_paths[i][0] == \"T\" for i in range(len(all_paths))]\n", "image_paths = all_paths_np[bools_image_paths]\n", "seg_paths = all_paths_np[bools_seg_paths]\n", "image_paths = all_paths_np[bools_image_paths]\n", "seg_paths = all_paths_np[bools_seg_paths]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "OkFoJeYi3a-1" }, "source": [ "def pair_memb_seg_lists(memb, seg):\n", " memb_fix, seg_fix=[],[]\n", " for i in memb:\n", " for j in seg:\n", " if i[5:8] == j[1:4]:\n", " memb_fix.append(i)\n", " seg_fix.append(j)\n", " return(memb_fix, seg_fix)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "DuH9pLAC2mEG", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "adead18e-83d0-4050-a154-5db8acea82d6" }, "source": [ "images_fix, seg_fix = pair_memb_seg_lists(image_paths, seg_paths)\n", "mat = scipy.io.loadmat(\"/content/170704plc1p2/\" + images_fix[10])\n", "seg_np = scipy.io.loadmat(\"/content/170704plc1p2/\" + seg_fix[10])\n", "\n", "image = (mat[\"embryo\"])\n", "segmented = seg_np[\"membSeg\"]\n", "segmented.shape,image.shape" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "((205, 285, 134), (256, 356, 70))" ] }, "metadata": { "tags": [] }, "execution_count": 10 } ] }, { "cell_type": "code", "metadata": { "id": "4gBYSn6Puy21" }, "source": [ "def show_n_channel_image(n_channels, image):\n", " for i in range(n_channels):\n", " clear_output(wait=True)\n", " plt.imshow(image[:,:,i])\n", " plt.show()\n", "\n", "show_n_channel_image(134, segmented)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "e63m5G-Hu5UD" }, "source": [ "show_n_channel_image(70, image)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "rE48cWkrvGKh" }, "source": [ "seg_mv = np.moveaxis(segmented, 2,0)\n", "\n", "def partition(lst, n):\n", " increment = len(lst) / float(n)\n", " last = 0\n", " i = 1\n", " results = []\n", " while last < len(lst):\n", " idx = int(round(increment * i))\n", " results.append(lst[last:idx])\n", " last = idx\n", " i += 1\n", " return results\n", "\n", "slices = partition(seg_mv, 70)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "6DZy7Y5RvGIB" }, "source": [ "def show_images(img_list):\n", " len_x = len(img_list)\n", " fig, ax = plt.subplots(1,len_x, figsize = (10,3))\n", " for i in range(len(img_list)):\n", " ax[i].imshow(img_list[i])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "FA4jxK9f4iYT" }, "source": [ "def prepare_segmentation_data(data_foldername = \"170704plc1p2\", save_folder = \"training_data_pil\"):\n", " metadata = []\n", " \n", " if os.path.isdir(save_folder) is not True:\n", " os.mkdir(save_folder)\n", " os.mkdir(save_folder + \"/\" + \"masks\")\n", " os.mkdir(save_folder + \"/\" + \"images\")\n", " \n", " \n", " all_paths = os.listdir(data_foldername)\n", " all_paths_np = np.array(all_paths)\n", " bools_image_paths = [all_paths[i][0] == \"m\" for i in range(len(all_paths))]\n", " bools_seg_paths = [all_paths[i][0] == \"T\" for i in range(len(all_paths))]\n", " image_paths = all_paths_np[bools_image_paths]\n", " seg_paths = all_paths_np[bools_seg_paths]\n", " images_fix, seg_fix = pair_memb_seg_lists(image_paths, seg_paths)\n", "\n", " \n", " for i in tqdm(range(len(images_fix))):\n", " img_mat = scipy.io.loadmat(data_foldername + \"/\" + images_fix[i])\n", " seg_mat = scipy.io.loadmat(data_foldername + \"/\" + seg_fix[i])\n", "\n", " images = mat[\"embryo\"]\n", " segs = seg_mat[\"membSeg\"]\n", " \n", " images_mv = np.moveaxis(images, 2,0)\n", " seg_mv = np.moveaxis(segmented, 2,0)\n", " \n", " slices = partition(seg_mv, 70)\n", " \n", " for j in range(9,58,1): ## exclude black or mostly black images \n", " image_name = \"real_\" + str(i)+\"_\"+str(j)+ \".png\"\n", "\n", " image = images_mv[j]\n", " masks = slices[j]\n", " im = Image.fromarray(image)\n", " im.save(save_folder + \"/images/\" + image_name, 'PNG', quality=100)\n", " # plt.imshow(image)\n", " # plt.show()\n", "# print(save_folder + \"/images/\" + image_name)\n", " \n", " for k in range(len(masks)):\n", " mask_name = \"mask_\" + str(i) + \"_\" + str(j) + \"_\" + str(k) + \".png\"\n", " #cv2.imwrite(save_folder + \"/masks/\" + mask_name, masks[k])\n", " msk = Image.fromarray(masks[k])\n", " # plt.imshow(masks[k])\n", " # plt.show()\n", " msk.save(save_folder + \"/masks/\" + mask_name, 'PNG', quality=100)\n", " metadata.append([save_folder + \"/images/\" + image_name, save_folder + \"/masks/\" + mask_name])\n", " return metadata" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 65, "referenced_widgets": [ "13bd72b174514731a454a78c974c2c1d", "b065ac27ed364e0fb649c75447805bda", "a4826465021640ea8740ef1d4e05ddb4", "724664ae5e0747e38415b6fa11e5ad98", "061c4359345b40f7a5aaeaa8756b2e63", "fb77e99a93ea49fb82fd7e1b94ecc3fe", "c3410d1ce72c45e4bd58ee3fae6e1606", "ab2dc9da8ded4eb595a9a81b07ee62da" ] }, "id": "_29eWU5M4jYE", "outputId": "2a5467d4-f382-4469-83d4-d3a2511ac931" }, "source": [ "metadata = prepare_segmentation_data(data_foldername = \"170704plc1p2\", save_folder = \"training_data_pil\")" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "13bd72b174514731a454a78c974c2c1d", "version_minor": 0, "version_major": 2 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=95.0), HTML(value='')))" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 202 }, "id": "tdpuKqWT5DpR", "outputId": "c3a33cea-979c-44af-8e29-b008bb400668" }, "source": [ "df = pd.DataFrame(metadata, columns = [\"image\", \"mask\"])\n", "df.to_csv(\"metadata_pil.csv\", index = False )\n", "df.tail()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", " | image | \n", "mask | \n", "
---|---|---|
8925 | \n", "training_data_pil/images/real_94_55.png | \n", "training_data_pil/masks/mask_94_55_1.png | \n", "
8926 | \n", "training_data_pil/images/real_94_56.png | \n", "training_data_pil/masks/mask_94_56_0.png | \n", "
8927 | \n", "training_data_pil/images/real_94_56.png | \n", "training_data_pil/masks/mask_94_56_1.png | \n", "
8928 | \n", "training_data_pil/images/real_94_57.png | \n", "training_data_pil/masks/mask_94_57_0.png | \n", "
8929 | \n", "training_data_pil/images/real_94_57.png | \n", "training_data_pil/masks/mask_94_57_1.png | \n", "